@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conversation registry for the local-inference path.
|
|
3
|
+
*
|
|
4
|
+
* Today's slot allocation is purely a hash function: `deriveSlotId` maps a
|
|
5
|
+
* `promptCacheKey` (or any stable string) to `slot_id` in `[0, parallel)`.
|
|
6
|
+
* That works for one-shot calls but breaks for long agentic loops:
|
|
7
|
+
*
|
|
8
|
+
* - Two distinct conversations whose cache keys hash to the same slot
|
|
9
|
+
* evict each other's KV every turn (slot thrashing).
|
|
10
|
+
* - The current high-water mark of concurrent conversations is invisible,
|
|
11
|
+
* so `--parallel N` cannot be tuned to fit.
|
|
12
|
+
* - There is no notion of an explicit "I am still using this slot" lease,
|
|
13
|
+
* so eviction is purely best-effort.
|
|
14
|
+
*
|
|
15
|
+
* This registry keeps a per-conversation reservation. `openConversation`
|
|
16
|
+
* picks the lowest-loaded slot and pins the conversation to it; subsequent
|
|
17
|
+
* `generateInConversation` calls always land on the same slot. When the
|
|
18
|
+
* pool is full, slot reuse falls back to the same-as-before hash policy
|
|
19
|
+
* (two leases on the same slot still serialise correctly via the dispatcher's
|
|
20
|
+
* generation queue).
|
|
21
|
+
*
|
|
22
|
+
* The registry tracks the high-water mark of concurrently-open conversations
|
|
23
|
+
* so the engine can warn, or later restart llama-server with a higher
|
|
24
|
+
* --parallel, when the load outgrows the configured slot count.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { createHash } from "node:crypto";
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Opaque handle returned by `openConversation`. Callers MUST treat this as
|
|
31
|
+
* opaque — the registry owns the slot id and lifetime.
|
|
32
|
+
*/
|
|
33
|
+
export interface ConversationHandle {
|
|
34
|
+
readonly conversationId: string;
|
|
35
|
+
readonly modelId: string;
|
|
36
|
+
/**
|
|
37
|
+
* Pinned slot id in `[0, parallel)`, or `-1` when slot pinning is disabled
|
|
38
|
+
* (parallel <= 0). Used by both backends as the cache key:
|
|
39
|
+
* - llama-server: forwarded as `slot_id` in the request payload.
|
|
40
|
+
* - node-llama-cpp: combined with the conversation id to derive the
|
|
41
|
+
* session-pool key so identical conversations share a session.
|
|
42
|
+
*/
|
|
43
|
+
readonly slotId: number;
|
|
44
|
+
/** Wall-clock ms when the handle was opened. */
|
|
45
|
+
readonly openedAtMs: number;
|
|
46
|
+
/** Wall-clock ms when the handle was last touched (open or generate). */
|
|
47
|
+
lastUsedMs: number;
|
|
48
|
+
/** TTL after which the registry MAY auto-close on the next sweep. */
|
|
49
|
+
readonly ttlMs: number;
|
|
50
|
+
/** True when `closeConversation` has been called; further use is rejected. */
|
|
51
|
+
closed: boolean;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface OpenConversationArgs {
|
|
55
|
+
conversationId: string;
|
|
56
|
+
modelId: string;
|
|
57
|
+
/** Slot count from the running server (`--parallel N`). Defaults to 1. */
|
|
58
|
+
parallel?: number;
|
|
59
|
+
/**
|
|
60
|
+
* TTL after which the handle is considered idle and may be auto-closed
|
|
61
|
+
* by `evictIdle`. Defaults to 60 minutes — long enough for an LLM call
|
|
62
|
+
* to finish even on a slow drafter, short enough to recover from forgotten
|
|
63
|
+
* close calls within the long-cache window.
|
|
64
|
+
*/
|
|
65
|
+
ttlMs?: number;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const DEFAULT_HANDLE_TTL_MS = 60 * 60 * 1000;
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* In-memory registry of open conversation handles. A single instance is
|
|
72
|
+
* shared by the engine; each backend reads from it on every generate to
|
|
73
|
+
* decide which slot to pin to.
|
|
74
|
+
*/
|
|
75
|
+
export class ConversationRegistry {
|
|
76
|
+
private readonly handles = new Map<string, ConversationHandle>();
|
|
77
|
+
/** Per-slot reference count; lowest-loaded slot wins on next open. */
|
|
78
|
+
private readonly slotLoad = new Map<number, number>();
|
|
79
|
+
/** Largest concurrent open count seen; the engine reads this for parallel auto-tune. */
|
|
80
|
+
private highWaterMark = 0;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Lookup / open a conversation handle. Idempotent for the same
|
|
84
|
+
* conversation id + model id; callers can call this on every turn
|
|
85
|
+
* without leaking handles. When the call is reusing an existing handle,
|
|
86
|
+
* `lastUsedMs` is bumped for LRU-style eviction tracking.
|
|
87
|
+
*/
|
|
88
|
+
open(args: OpenConversationArgs): ConversationHandle {
|
|
89
|
+
if (!args.conversationId) {
|
|
90
|
+
throw new Error("[conversation-registry] conversationId is required");
|
|
91
|
+
}
|
|
92
|
+
if (!args.modelId) {
|
|
93
|
+
throw new Error("[conversation-registry] modelId is required");
|
|
94
|
+
}
|
|
95
|
+
const compositeKey = this.compositeKey(args.conversationId, args.modelId);
|
|
96
|
+
const existing = this.handles.get(compositeKey);
|
|
97
|
+
if (existing && !existing.closed) {
|
|
98
|
+
existing.lastUsedMs = Date.now();
|
|
99
|
+
return existing;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const parallel =
|
|
103
|
+
typeof args.parallel === "number" && args.parallel > 0
|
|
104
|
+
? Math.floor(args.parallel)
|
|
105
|
+
: 1;
|
|
106
|
+
const slotId = this.pickLowestLoadedSlot(parallel, args.conversationId);
|
|
107
|
+
const now = Date.now();
|
|
108
|
+
const handle: ConversationHandle = {
|
|
109
|
+
conversationId: args.conversationId,
|
|
110
|
+
modelId: args.modelId,
|
|
111
|
+
slotId,
|
|
112
|
+
openedAtMs: now,
|
|
113
|
+
lastUsedMs: now,
|
|
114
|
+
ttlMs: args.ttlMs ?? DEFAULT_HANDLE_TTL_MS,
|
|
115
|
+
closed: false,
|
|
116
|
+
};
|
|
117
|
+
this.handles.set(compositeKey, handle);
|
|
118
|
+
this.slotLoad.set(slotId, (this.slotLoad.get(slotId) ?? 0) + 1);
|
|
119
|
+
if (this.handles.size > this.highWaterMark) {
|
|
120
|
+
this.highWaterMark = this.handles.size;
|
|
121
|
+
}
|
|
122
|
+
return handle;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Lookup an open handle by conversation+model. Returns null when the
|
|
127
|
+
* conversation has not been opened or has already been closed. Bumps
|
|
128
|
+
* `lastUsedMs` so an LRU sweep treats reads as activity.
|
|
129
|
+
*/
|
|
130
|
+
get(conversationId: string, modelId: string): ConversationHandle | null {
|
|
131
|
+
const handle = this.handles.get(this.compositeKey(conversationId, modelId));
|
|
132
|
+
if (!handle || handle.closed) return null;
|
|
133
|
+
handle.lastUsedMs = Date.now();
|
|
134
|
+
return handle;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Close + drop a handle. Idempotent — closing an unknown / already-closed
|
|
139
|
+
* handle has no additional effect, so callers can call this from cleanup paths
|
|
140
|
+
* unconditionally.
|
|
141
|
+
*/
|
|
142
|
+
close(conversationId: string, modelId: string): void {
|
|
143
|
+
const compositeKey = this.compositeKey(conversationId, modelId);
|
|
144
|
+
const handle = this.handles.get(compositeKey);
|
|
145
|
+
if (!handle) return;
|
|
146
|
+
handle.closed = true;
|
|
147
|
+
this.handles.delete(compositeKey);
|
|
148
|
+
const remaining = (this.slotLoad.get(handle.slotId) ?? 0) - 1;
|
|
149
|
+
if (remaining <= 0) {
|
|
150
|
+
this.slotLoad.delete(handle.slotId);
|
|
151
|
+
} else {
|
|
152
|
+
this.slotLoad.set(handle.slotId, remaining);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Sweep handles whose `lastUsedMs` is older than their TTL. Returns the
|
|
158
|
+
* conversation ids dropped so callers can persist final KV state to
|
|
159
|
+
* disk, etc. Safe to call on a timer.
|
|
160
|
+
*/
|
|
161
|
+
evictIdle(now: number = Date.now()): string[] {
|
|
162
|
+
const dropped: string[] = [];
|
|
163
|
+
for (const [compositeKey, handle] of this.handles) {
|
|
164
|
+
if (now - handle.lastUsedMs > handle.ttlMs) {
|
|
165
|
+
handle.closed = true;
|
|
166
|
+
this.handles.delete(compositeKey);
|
|
167
|
+
const remaining = (this.slotLoad.get(handle.slotId) ?? 0) - 1;
|
|
168
|
+
if (remaining <= 0) {
|
|
169
|
+
this.slotLoad.delete(handle.slotId);
|
|
170
|
+
} else {
|
|
171
|
+
this.slotLoad.set(handle.slotId, remaining);
|
|
172
|
+
}
|
|
173
|
+
dropped.push(handle.conversationId);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return dropped;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Snapshot every currently-open handle. Used by the shutdown path to
|
|
181
|
+
* emit a save-state request per slot.
|
|
182
|
+
*/
|
|
183
|
+
snapshot(): readonly ConversationHandle[] {
|
|
184
|
+
return [...this.handles.values()];
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/** Largest concurrent open count seen since the registry was created. */
|
|
188
|
+
highWater(): number {
|
|
189
|
+
return this.highWaterMark;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/** Number of currently-open handles. */
|
|
193
|
+
size(): number {
|
|
194
|
+
return this.handles.size;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Recommended `--parallel` slot count given the observed high-water mark
|
|
199
|
+
* of concurrently-open conversations plus a small headroom (max(2, 25%)).
|
|
200
|
+
* The engine's auto-tune (J4) compares this against the running server's
|
|
201
|
+
* slot count: when this is larger AND there's RAM headroom, it restarts
|
|
202
|
+
* llama-server with the higher value so new conversations get their own
|
|
203
|
+
* KV slots instead of thrashing.
|
|
204
|
+
*
|
|
205
|
+
* `running` is the currently-configured slot count; when the high-water
|
|
206
|
+
* mark hasn't outgrown it, this returns `running` (no resize needed) so
|
|
207
|
+
* callers can compare against equality without a second branch.
|
|
208
|
+
*/
|
|
209
|
+
recommendedParallel(running: number): number {
|
|
210
|
+
const headroom = Math.max(2, Math.ceil(this.highWaterMark * 0.25));
|
|
211
|
+
const desired = Math.max(1, this.highWaterMark + headroom);
|
|
212
|
+
return Math.max(running, desired);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Drop every handle and reset the high-water mark + slot-load bookkeeping.
|
|
217
|
+
* Test-only — the module singleton leaks state across files when the suite
|
|
218
|
+
* runs together; call this in `beforeEach` to isolate. Not part of the
|
|
219
|
+
* runtime contract.
|
|
220
|
+
*/
|
|
221
|
+
__resetForTests(): void {
|
|
222
|
+
for (const handle of this.handles.values()) handle.closed = true;
|
|
223
|
+
this.handles.clear();
|
|
224
|
+
this.slotLoad.clear();
|
|
225
|
+
this.highWaterMark = 0;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Pick the slot with the fewest in-flight handles. Ties are broken by a
|
|
230
|
+
* deterministic hash of the conversation id, which avoids consistently
|
|
231
|
+
* loading slot 0 when N concurrent opens race.
|
|
232
|
+
*/
|
|
233
|
+
private pickLowestLoadedSlot(
|
|
234
|
+
parallel: number,
|
|
235
|
+
conversationId: string,
|
|
236
|
+
): number {
|
|
237
|
+
if (parallel <= 1) return 0;
|
|
238
|
+
let bestSlot = 0;
|
|
239
|
+
let bestLoad = Number.POSITIVE_INFINITY;
|
|
240
|
+
for (let slot = 0; slot < parallel; slot += 1) {
|
|
241
|
+
const load = this.slotLoad.get(slot) ?? 0;
|
|
242
|
+
if (load < bestLoad) {
|
|
243
|
+
bestLoad = load;
|
|
244
|
+
bestSlot = slot;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
if (bestLoad === 0) return bestSlot;
|
|
248
|
+
// All slots are loaded equally — use the conversation hash for a
|
|
249
|
+
// deterministic tie-break. Same conversation, same slot when reopened.
|
|
250
|
+
const digest = createHash("sha256").update(conversationId).digest();
|
|
251
|
+
return digest.readUInt32BE(0) % parallel;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
private compositeKey(conversationId: string, modelId: string): string {
|
|
255
|
+
return `${modelId}::${conversationId}`;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Module-singleton registry. The engine reads this on every generate; the
|
|
261
|
+
* conversation lifecycle API (`openConversation`, `closeConversation`)
|
|
262
|
+
* mutates it.
|
|
263
|
+
*/
|
|
264
|
+
export const conversationRegistry = new ConversationRegistry();
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Desktop production `FfiBackendRuntime` over the FUSED `libelizainference` —
|
|
3
|
+
* the SOLE desktop text runtime now that libllama has been retired.
|
|
4
|
+
*
|
|
5
|
+
* Desktop text generation runs through the fused library: the same
|
|
6
|
+
* `eliza_inference_llm_stream_*` ABI (v9) the voice subsystem already loads,
|
|
7
|
+
* so text + voice share one native lib, one GGML pin, and one resident text
|
|
8
|
+
* model.
|
|
9
|
+
*
|
|
10
|
+
* - The fused lib's `eliza_inference_llm_stream_open` loads the bundle's text
|
|
11
|
+
* GGUF (`<bundleRoot>/text/*.gguf`) and applies same-file MTP speculative
|
|
12
|
+
* decoding + KV-cache quant + per-load GPU layers natively (ABI v9). The
|
|
13
|
+
* path is gated on the capability probes
|
|
14
|
+
* (`llmStreamSupported && llmMtpSupported && llmKvQuantSupported`).
|
|
15
|
+
* - A fused lib that lacks MTP / KV-quant / native tokenize is REFUSED by
|
|
16
|
+
* `supported()` → the engine raises LocalInferenceUnavailable. There is no
|
|
17
|
+
* libllama fallback and never an unoptimized fused loop.
|
|
18
|
+
*
|
|
19
|
+
* Tokenization runs over the fused handle's resident text vocab via ABI-v9
|
|
20
|
+
* `eliza_inference_tokenize`: the fused `create()` + first `llmStreamOpen`
|
|
21
|
+
* already made the text vocab resident, so no second model is loaded.
|
|
22
|
+
* `tokenizeSupported()` gates this; a pre-v9 lib without the symbol is refused.
|
|
23
|
+
*
|
|
24
|
+
* Lifecycle: one fused context per loaded model; `acquire()` builds it,
|
|
25
|
+
* `release()` tears it down. A throwing native free poisons the runtime so no
|
|
26
|
+
* new allocation happens over leaked resources.
|
|
27
|
+
*/
|
|
28
|
+
import type { BackendPlan } from "./backend";
|
|
29
|
+
import type { FfiBackendRuntime, FfiBackendSession } from "./ffi-streaming-backend";
|
|
30
|
+
/**
|
|
31
|
+
* Resolve the on-disk path to the fused `libelizainference`. Precedence:
|
|
32
|
+
* 1. `ELIZA_INFERENCE_LIBRARY` — an explicit absolute path.
|
|
33
|
+
* 2. `<bundleRoot>/lib/<name>` — the bundle-local lib.
|
|
34
|
+
* 3. `ELIZA_INFERENCE_LIB_DIR/<name>` — an explicit lib directory.
|
|
35
|
+
* 4. `<stateDir>/local-inference/lib/<name>` — the default staging dir written
|
|
36
|
+
* by `scripts/stage-desktop-fused-lib.mjs`, so a staged desktop build is
|
|
37
|
+
* found with no env wiring.
|
|
38
|
+
* Returns null when none of the candidates exist on disk — `supported()` then
|
|
39
|
+
* reports unavailable and the engine raises LocalInferenceUnavailable.
|
|
40
|
+
*/
|
|
41
|
+
export declare function resolveFusedLibraryPath(bundleRoot: string | null, env?: NodeJS.ProcessEnv): string | null;
|
|
42
|
+
export declare class DesktopFusedFfiBackendRuntime implements FfiBackendRuntime {
|
|
43
|
+
private active;
|
|
44
|
+
private poisonedError;
|
|
45
|
+
/** Cached `supported()` result so the engine gate and the dispatcher agree. */
|
|
46
|
+
private supportedCache;
|
|
47
|
+
/**
|
|
48
|
+
* Viable only when:
|
|
49
|
+
* - bun:ffi resolves on the current runtime,
|
|
50
|
+
* - the fused dylib is present AND reports ABI-v9 capability: the
|
|
51
|
+
* streaming-LLM surface, same-file MTP, KV-cache quant, AND native
|
|
52
|
+
* tokenization (`eliza_inference_tokenize`).
|
|
53
|
+
* A pre-v9 fused lib reports the probes as unsupported → refused, and the
|
|
54
|
+
* engine raises LocalInferenceUnavailable. libllama has been retired; there
|
|
55
|
+
* is no fallback runtime and no tokenizer sidecar.
|
|
56
|
+
*/
|
|
57
|
+
supported(): boolean;
|
|
58
|
+
/** Clear the cached `supported()` result (tests / lib swaps). */
|
|
59
|
+
resetSupportedCache(): void;
|
|
60
|
+
private computeSupported;
|
|
61
|
+
acquire(plan: BackendPlan): Promise<FfiBackendSession>;
|
|
62
|
+
parallelSlots(): number;
|
|
63
|
+
/**
|
|
64
|
+
* Whether the LIVE session can describe images through the fused
|
|
65
|
+
* `eliza_inference_describe_image`. Mirrors the FfiStreamingBackend gate:
|
|
66
|
+
* true only when a session is bound and the fused lib exposes vision.
|
|
67
|
+
*/
|
|
68
|
+
visionSupported(): boolean;
|
|
69
|
+
/**
|
|
70
|
+
* Vision describe through the fused `eliza_inference_describe_image`
|
|
71
|
+
* (ABI v9). Reuses the mtmd machinery linked for ASR over the bundle's text
|
|
72
|
+
* model + the passed mmproj projector. The `FfiStreamingBackend` forwards
|
|
73
|
+
* `describeImage`/`visionSupported` to this runtime by duck-typing.
|
|
74
|
+
*/
|
|
75
|
+
describeImage(args: {
|
|
76
|
+
imageBytes: Uint8Array;
|
|
77
|
+
mmprojPath: string;
|
|
78
|
+
prompt?: string;
|
|
79
|
+
maxTokens?: number;
|
|
80
|
+
temperature?: number;
|
|
81
|
+
signal?: AbortSignal;
|
|
82
|
+
}): Promise<{
|
|
83
|
+
text: string;
|
|
84
|
+
projectorMs?: number;
|
|
85
|
+
decodeMs?: number;
|
|
86
|
+
}>;
|
|
87
|
+
release(): Promise<void>;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Process singleton — the engine wires this as the sole `FfiBackendRuntime` for
|
|
91
|
+
* the dispatcher's `"llama-cpp"` slot. The ABI-v9 capability probes in
|
|
92
|
+
* `supported()` gate whether the fused lib serves text at all.
|
|
93
|
+
*/
|
|
94
|
+
export declare const desktopFusedFfiBackendRuntime: DesktopFusedFfiBackendRuntime;
|
|
95
|
+
//# sourceMappingURL=desktop-fused-ffi-backend-runtime.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"desktop-fused-ffi-backend-runtime.d.ts","sourceRoot":"","sources":["desktop-fused-ffi-backend-runtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAOH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,KAAK,EACX,iBAAiB,EACjB,iBAAiB,EACjB,MAAM,yBAAyB,CAAC;AAsBjC;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CACtC,UAAU,EAAE,MAAM,GAAG,IAAI,EACzB,GAAG,GAAE,MAAM,CAAC,UAAwB,GAClC,MAAM,GAAG,IAAI,CAgBf;AAmBD,qBAAa,6BAA8B,YAAW,iBAAiB;IACtE,OAAO,CAAC,MAAM,CAAmC;IACjD,OAAO,CAAC,aAAa,CAAsB;IAC3C,+EAA+E;IAC/E,OAAO,CAAC,cAAc,CAAwB;IAE9C;;;;;;;;;OASG;IACH,SAAS,IAAI,OAAO;IAMpB,iEAAiE;IACjE,mBAAmB,IAAI,IAAI;IAI3B,OAAO,CAAC,gBAAgB;IAsClB,OAAO,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAgF5D,aAAa,IAAI,MAAM;IAMvB;;;;OAIG;IACH,eAAe,IAAI,OAAO;IAS1B;;;;;OAKG;IACG,aAAa,CAAC,IAAI,EAAE;QACzB,UAAU,EAAE,UAAU,CAAC;QACvB,UAAU,EAAE,MAAM,CAAC;QACnB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,MAAM,CAAC,EAAE,WAAW,CAAC;KACrB,GAAG,OAAO,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IA4BhE,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAgB9B;AAED;;;;GAIG;AACH,eAAO,MAAM,6BAA6B,+BACN,CAAC"}
|