@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Soft cloud-fallback wrapper for local-inference TEXT_LARGE / TEXT_SMALL.
|
|
3
|
+
*
|
|
4
|
+
* Why this exists: on mobile (AOSP / iOS) the local llama backend has very
|
|
5
|
+
* different failure modes from a desktop process. The model GGUF may not be
|
|
6
|
+
* staged yet, the FFI dlopen may have failed, the device may be in low-power
|
|
7
|
+
* mode and refuse to prefill, or the user may have explicitly disabled the
|
|
8
|
+
* local engine. We do not want any of those states to surface as a
|
|
9
|
+
* "No handler found for delegate type: TEXT_LARGE" runtime error — when an
|
|
10
|
+
* Anthropic / OpenAI / Eliza Cloud handler is also registered, the runtime
|
|
11
|
+
* should transparently fall through to cloud.
|
|
12
|
+
*
|
|
13
|
+
* Design constraints (per AGENTS.md):
|
|
14
|
+
* - No silent try/catch. The wrapper distinguishes "ran successfully" from
|
|
15
|
+
* "ran and decided to fallback" via an EXPLICIT typed return:
|
|
16
|
+
* { kind: "ok"; text: string }
|
|
17
|
+
* | { kind: "fallback"; reason: FallbackReason }
|
|
18
|
+
* Callers branch on `kind`. The wrapper does NOT swallow errors —
|
|
19
|
+
* any unhandled throw bubbles up to the runtime.
|
|
20
|
+
* - Local errors are CLASSIFIED. Unrecoverable bugs (programming errors,
|
|
21
|
+
* out-of-memory, OS kill signals) propagate. Recoverable conditions
|
|
22
|
+
* (model not staged, abort, downstream provider transient) trigger
|
|
23
|
+
* fallback.
|
|
24
|
+
* - Cloud forwarding is registry-driven. We look up the next-highest
|
|
25
|
+
* priority handler from the runtime's model registry rather than
|
|
26
|
+
* hardcoding "anthropic" or "openai". That keeps the wrapper neutral
|
|
27
|
+
* to which cloud is paired.
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import type {
|
|
31
|
+
GenerateTextParams,
|
|
32
|
+
IAgentRuntime,
|
|
33
|
+
JsonValue,
|
|
34
|
+
ModelTypeName,
|
|
35
|
+
} from "@elizaos/core";
|
|
36
|
+
|
|
37
|
+
export type FallbackReason =
|
|
38
|
+
/** Local backend reported it can't serve this request at all (no model, FFI dlopen failed, etc). */
|
|
39
|
+
| "local-unavailable"
|
|
40
|
+
/** Local backend was busy, queued past a deadline, or refused (thermal, low-power). */
|
|
41
|
+
| "local-overloaded"
|
|
42
|
+
/** Local backend errored during prefill or decode. */
|
|
43
|
+
| "local-error"
|
|
44
|
+
/** Caller cancelled before local could finish; cloud may still serve. */
|
|
45
|
+
| "local-aborted-pre-completion"
|
|
46
|
+
/** Local handler isn't registered on this runtime build. */
|
|
47
|
+
| "local-not-registered";
|
|
48
|
+
|
|
49
|
+
export type LocalGenerateOutcome =
|
|
50
|
+
| { kind: "ok"; text: string }
|
|
51
|
+
| { kind: "fallback"; reason: FallbackReason; cause?: Error };
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Classify a thrown error as a fallback-eligible failure or a hard bug that
|
|
55
|
+
* should propagate. The split is conservative: only well-known recoverable
|
|
56
|
+
* shapes flip to fallback; anything else bubbles up so the operator sees the
|
|
57
|
+
* real failure instead of a silent rotation to cloud.
|
|
58
|
+
*/
|
|
59
|
+
export function classifyLocalError(err: unknown): {
|
|
60
|
+
fallback: boolean;
|
|
61
|
+
reason: FallbackReason;
|
|
62
|
+
} {
|
|
63
|
+
if (err instanceof Error) {
|
|
64
|
+
const name = err.name;
|
|
65
|
+
const msg = err.message.toLowerCase();
|
|
66
|
+
if (name === "AbortError") {
|
|
67
|
+
return { fallback: false, reason: "local-aborted-pre-completion" };
|
|
68
|
+
}
|
|
69
|
+
// KV-cache spill cannot meet the latency budget on this device — this is
|
|
70
|
+
// a deliberate hard-fail (packages/inference/AGENTS.md §3): the engine
|
|
71
|
+
// surfaces it to the UI as a structured error. There is no silent
|
|
72
|
+
// rotation to cloud and no "load anyway, slowly".
|
|
73
|
+
if (name === "KvSpillUnsupportedError") {
|
|
74
|
+
return { fallback: false, reason: "local-error" };
|
|
75
|
+
}
|
|
76
|
+
if (
|
|
77
|
+
msg.includes("no bundled") ||
|
|
78
|
+
msg.includes("not installed in this build") ||
|
|
79
|
+
msg.includes("node-llama-cpp is not installed") ||
|
|
80
|
+
msg.includes("no local model is active") ||
|
|
81
|
+
msg.includes("dlopen") ||
|
|
82
|
+
msg.includes("missing libllama")
|
|
83
|
+
) {
|
|
84
|
+
return { fallback: true, reason: "local-unavailable" };
|
|
85
|
+
}
|
|
86
|
+
if (
|
|
87
|
+
msg.includes("decode: failed to find a memory slot") ||
|
|
88
|
+
msg.includes("thermal") ||
|
|
89
|
+
msg.includes("low-power")
|
|
90
|
+
) {
|
|
91
|
+
return { fallback: true, reason: "local-overloaded" };
|
|
92
|
+
}
|
|
93
|
+
if (
|
|
94
|
+
msg.includes("llama_decode") ||
|
|
95
|
+
msg.includes("llama_tokenize") ||
|
|
96
|
+
msg.includes("llama_sampler") ||
|
|
97
|
+
msg.includes("ggml_assert")
|
|
98
|
+
) {
|
|
99
|
+
return { fallback: true, reason: "local-error" };
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return { fallback: false, reason: "local-error" };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Locate a cloud TEXT_* handler in the runtime's model registry that is NOT
|
|
107
|
+
* the supplied `localProvider`. The runtime stores handlers per-modelType
|
|
108
|
+
* sorted by priority; we walk the list and skip our own provider so we
|
|
109
|
+
* delegate to cloud instead of recursing into local.
|
|
110
|
+
*/
|
|
111
|
+
export type RuntimeWithModelLookup = IAgentRuntime & {
|
|
112
|
+
models: Map<
|
|
113
|
+
string,
|
|
114
|
+
Array<{
|
|
115
|
+
provider: string;
|
|
116
|
+
priority: number;
|
|
117
|
+
handler: (
|
|
118
|
+
runtime: IAgentRuntime,
|
|
119
|
+
params: Record<string, JsonValue | object>,
|
|
120
|
+
) => Promise<JsonValue | object>;
|
|
121
|
+
}>
|
|
122
|
+
>;
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
export interface CloudCandidate {
|
|
126
|
+
provider: string;
|
|
127
|
+
priority: number;
|
|
128
|
+
handler: (
|
|
129
|
+
runtime: IAgentRuntime,
|
|
130
|
+
params: Record<string, JsonValue | object>,
|
|
131
|
+
) => Promise<JsonValue | object>;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export function findCloudCandidate(
|
|
135
|
+
runtime: IAgentRuntime,
|
|
136
|
+
modelType: ModelTypeName | string,
|
|
137
|
+
excludeProvider: string,
|
|
138
|
+
): CloudCandidate | null {
|
|
139
|
+
const r = runtime as RuntimeWithModelLookup;
|
|
140
|
+
const entries = r.models.get(String(modelType));
|
|
141
|
+
if (!entries || entries.length === 0) return null;
|
|
142
|
+
// Sorted highest priority first by the runtime's registration. We want
|
|
143
|
+
// the FIRST non-local provider; that's our cloud candidate.
|
|
144
|
+
for (const entry of entries) {
|
|
145
|
+
if (entry.provider !== excludeProvider) {
|
|
146
|
+
return {
|
|
147
|
+
provider: entry.provider,
|
|
148
|
+
priority: entry.priority,
|
|
149
|
+
handler: entry.handler,
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export interface CloudFallbackOptions {
|
|
157
|
+
/** Provider id of the local handler being wrapped (e.g. "eliza-aosp-llama"). */
|
|
158
|
+
localProvider: string;
|
|
159
|
+
/** Model type this wrapper services (TEXT_LARGE, TEXT_SMALL, etc). */
|
|
160
|
+
modelType: ModelTypeName | string;
|
|
161
|
+
/**
|
|
162
|
+
* The local handler we wrap. Returns `{ kind: "ok" }` on success;
|
|
163
|
+
* `{ kind: "fallback", reason }` to delegate to cloud.
|
|
164
|
+
*/
|
|
165
|
+
localGenerate: (
|
|
166
|
+
runtime: IAgentRuntime,
|
|
167
|
+
params: GenerateTextParams,
|
|
168
|
+
) => Promise<LocalGenerateOutcome>;
|
|
169
|
+
/** Optional logger; defaults to `console`-style no-op so we stay framework-free. */
|
|
170
|
+
log?: (message: string, detail?: Record<string, unknown>) => void;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Build a registered-handler-shape function that:
|
|
175
|
+
* 1. Calls `localGenerate`.
|
|
176
|
+
* 2. If `localGenerate` returns `{ kind: "ok" }`, returns that text.
|
|
177
|
+
* 3. If it returns `{ kind: "fallback" }`, looks up the next-best cloud
|
|
178
|
+
* handler for the same modelType and forwards to it. If no cloud
|
|
179
|
+
* handler exists, throws a typed error with the fallback reason.
|
|
180
|
+
*
|
|
181
|
+
* The returned function is suitable for `runtime.registerModel`.
|
|
182
|
+
*/
|
|
183
|
+
export function makeCloudFallbackHandler(
|
|
184
|
+
opts: CloudFallbackOptions,
|
|
185
|
+
): (
|
|
186
|
+
runtime: IAgentRuntime,
|
|
187
|
+
params: Record<string, JsonValue | object>,
|
|
188
|
+
) => Promise<string> {
|
|
189
|
+
const log = opts.log ?? (() => undefined);
|
|
190
|
+
return async (runtime, params) => {
|
|
191
|
+
const generateParams = params as unknown as GenerateTextParams;
|
|
192
|
+
const local = await opts.localGenerate(runtime, generateParams);
|
|
193
|
+
if (local.kind === "ok") {
|
|
194
|
+
return local.text;
|
|
195
|
+
}
|
|
196
|
+
log(
|
|
197
|
+
`[cloud-fallback] local handler returned fallback (reason=${local.reason})`,
|
|
198
|
+
{ modelType: String(opts.modelType), reason: local.reason },
|
|
199
|
+
);
|
|
200
|
+
const candidate = findCloudCandidate(
|
|
201
|
+
runtime,
|
|
202
|
+
opts.modelType,
|
|
203
|
+
opts.localProvider,
|
|
204
|
+
);
|
|
205
|
+
if (!candidate) {
|
|
206
|
+
const err = new Error(
|
|
207
|
+
`[cloud-fallback] Local inference reported ${local.reason} and no cloud handler is registered for ${String(opts.modelType)}. Pair Eliza Cloud or install a provider plugin (anthropic/openai) to enable fallback.`,
|
|
208
|
+
);
|
|
209
|
+
if (local.cause) {
|
|
210
|
+
(err as Error & { cause?: unknown }).cause = local.cause;
|
|
211
|
+
}
|
|
212
|
+
throw err;
|
|
213
|
+
}
|
|
214
|
+
log(
|
|
215
|
+
`[cloud-fallback] forwarding to ${candidate.provider} @ priority ${candidate.priority}`,
|
|
216
|
+
{
|
|
217
|
+
modelType: String(opts.modelType),
|
|
218
|
+
provider: candidate.provider,
|
|
219
|
+
reason: local.reason,
|
|
220
|
+
},
|
|
221
|
+
);
|
|
222
|
+
const result = await candidate.handler(runtime, params);
|
|
223
|
+
if (typeof result !== "string") {
|
|
224
|
+
throw new Error(
|
|
225
|
+
`[cloud-fallback] Cloud handler ${candidate.provider} returned non-string result for ${String(opts.modelType)}.`,
|
|
226
|
+
);
|
|
227
|
+
}
|
|
228
|
+
return result;
|
|
229
|
+
};
|
|
230
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conversation registry for the local-inference path.
|
|
3
|
+
*
|
|
4
|
+
* Today's slot allocation is purely a hash function: `deriveSlotId` maps a
|
|
5
|
+
* `promptCacheKey` (or any stable string) to `slot_id` in `[0, parallel)`.
|
|
6
|
+
* That works for one-shot calls but breaks for long agentic loops:
|
|
7
|
+
*
|
|
8
|
+
* - Two distinct conversations whose cache keys hash to the same slot
|
|
9
|
+
* evict each other's KV every turn (slot thrashing).
|
|
10
|
+
* - The current high-water mark of concurrent conversations is invisible,
|
|
11
|
+
* so `--parallel N` cannot be tuned to fit.
|
|
12
|
+
* - There is no notion of an explicit "I am still using this slot" lease,
|
|
13
|
+
* so eviction is purely best-effort.
|
|
14
|
+
*
|
|
15
|
+
* This registry keeps a per-conversation reservation. `openConversation`
|
|
16
|
+
* picks the lowest-loaded slot and pins the conversation to it; subsequent
|
|
17
|
+
* `generateInConversation` calls always land on the same slot. When the
|
|
18
|
+
* pool is full, slot reuse falls back to the same-as-before hash policy
|
|
19
|
+
* (two leases on the same slot still serialise correctly via the dispatcher's
|
|
20
|
+
* generation queue).
|
|
21
|
+
*
|
|
22
|
+
* The registry tracks the high-water mark of concurrently-open conversations
|
|
23
|
+
* so the engine can warn, or later restart llama-server with a higher
|
|
24
|
+
* --parallel, when the load outgrows the configured slot count.
|
|
25
|
+
*/
|
|
26
|
+
/**
|
|
27
|
+
* Opaque handle returned by `openConversation`. Callers MUST treat this as
|
|
28
|
+
* opaque — the registry owns the slot id and lifetime.
|
|
29
|
+
*/
|
|
30
|
+
export interface ConversationHandle {
|
|
31
|
+
readonly conversationId: string;
|
|
32
|
+
readonly modelId: string;
|
|
33
|
+
/**
|
|
34
|
+
* Pinned slot id in `[0, parallel)`, or `-1` when slot pinning is disabled
|
|
35
|
+
* (parallel <= 0). Used by both backends as the cache key:
|
|
36
|
+
* - llama-server: forwarded as `slot_id` in the request payload.
|
|
37
|
+
* - node-llama-cpp: combined with the conversation id to derive the
|
|
38
|
+
* session-pool key so identical conversations share a session.
|
|
39
|
+
*/
|
|
40
|
+
readonly slotId: number;
|
|
41
|
+
/** Wall-clock ms when the handle was opened. */
|
|
42
|
+
readonly openedAtMs: number;
|
|
43
|
+
/** Wall-clock ms when the handle was last touched (open or generate). */
|
|
44
|
+
lastUsedMs: number;
|
|
45
|
+
/** TTL after which the registry MAY auto-close on the next sweep. */
|
|
46
|
+
readonly ttlMs: number;
|
|
47
|
+
/** True when `closeConversation` has been called; further use is rejected. */
|
|
48
|
+
closed: boolean;
|
|
49
|
+
}
|
|
50
|
+
export interface OpenConversationArgs {
|
|
51
|
+
conversationId: string;
|
|
52
|
+
modelId: string;
|
|
53
|
+
/** Slot count from the running server (`--parallel N`). Defaults to 1. */
|
|
54
|
+
parallel?: number;
|
|
55
|
+
/**
|
|
56
|
+
* TTL after which the handle is considered idle and may be auto-closed
|
|
57
|
+
* by `evictIdle`. Defaults to 60 minutes — long enough for an LLM call
|
|
58
|
+
* to finish even on a slow drafter, short enough to recover from forgotten
|
|
59
|
+
* close calls within the long-cache window.
|
|
60
|
+
*/
|
|
61
|
+
ttlMs?: number;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* In-memory registry of open conversation handles. A single instance is
|
|
65
|
+
* shared by the engine; each backend reads from it on every generate to
|
|
66
|
+
* decide which slot to pin to.
|
|
67
|
+
*/
|
|
68
|
+
export declare class ConversationRegistry {
|
|
69
|
+
private readonly handles;
|
|
70
|
+
/** Per-slot reference count; lowest-loaded slot wins on next open. */
|
|
71
|
+
private readonly slotLoad;
|
|
72
|
+
/** Largest concurrent open count seen; the engine reads this for parallel auto-tune. */
|
|
73
|
+
private highWaterMark;
|
|
74
|
+
/**
|
|
75
|
+
* Lookup / open a conversation handle. Idempotent for the same
|
|
76
|
+
* conversation id + model id; callers can call this on every turn
|
|
77
|
+
* without leaking handles. When the call is reusing an existing handle,
|
|
78
|
+
* `lastUsedMs` is bumped for LRU-style eviction tracking.
|
|
79
|
+
*/
|
|
80
|
+
open(args: OpenConversationArgs): ConversationHandle;
|
|
81
|
+
/**
|
|
82
|
+
* Lookup an open handle by conversation+model. Returns null when the
|
|
83
|
+
* conversation has not been opened or has already been closed. Bumps
|
|
84
|
+
* `lastUsedMs` so an LRU sweep treats reads as activity.
|
|
85
|
+
*/
|
|
86
|
+
get(conversationId: string, modelId: string): ConversationHandle | null;
|
|
87
|
+
/**
|
|
88
|
+
* Close + drop a handle. Idempotent — closing an unknown / already-closed
|
|
89
|
+
* handle has no additional effect, so callers can call this from cleanup paths
|
|
90
|
+
* unconditionally.
|
|
91
|
+
*/
|
|
92
|
+
close(conversationId: string, modelId: string): void;
|
|
93
|
+
/**
|
|
94
|
+
* Sweep handles whose `lastUsedMs` is older than their TTL. Returns the
|
|
95
|
+
* conversation ids dropped so callers can persist final KV state to
|
|
96
|
+
* disk, etc. Safe to call on a timer.
|
|
97
|
+
*/
|
|
98
|
+
evictIdle(now?: number): string[];
|
|
99
|
+
/**
|
|
100
|
+
* Snapshot every currently-open handle. Used by the shutdown path to
|
|
101
|
+
* emit a save-state request per slot.
|
|
102
|
+
*/
|
|
103
|
+
snapshot(): readonly ConversationHandle[];
|
|
104
|
+
/** Largest concurrent open count seen since the registry was created. */
|
|
105
|
+
highWater(): number;
|
|
106
|
+
/** Number of currently-open handles. */
|
|
107
|
+
size(): number;
|
|
108
|
+
/**
|
|
109
|
+
* Recommended `--parallel` slot count given the observed high-water mark
|
|
110
|
+
* of concurrently-open conversations plus a small headroom (max(2, 25%)).
|
|
111
|
+
* The engine's auto-tune (J4) compares this against the running server's
|
|
112
|
+
* slot count: when this is larger AND there's RAM headroom, it restarts
|
|
113
|
+
* llama-server with the higher value so new conversations get their own
|
|
114
|
+
* KV slots instead of thrashing.
|
|
115
|
+
*
|
|
116
|
+
* `running` is the currently-configured slot count; when the high-water
|
|
117
|
+
* mark hasn't outgrown it, this returns `running` (no resize needed) so
|
|
118
|
+
* callers can compare against equality without a second branch.
|
|
119
|
+
*/
|
|
120
|
+
recommendedParallel(running: number): number;
|
|
121
|
+
/**
|
|
122
|
+
* Drop every handle and reset the high-water mark + slot-load bookkeeping.
|
|
123
|
+
* Test-only — the module singleton leaks state across files when the suite
|
|
124
|
+
* runs together; call this in `beforeEach` to isolate. Not part of the
|
|
125
|
+
* runtime contract.
|
|
126
|
+
*/
|
|
127
|
+
__resetForTests(): void;
|
|
128
|
+
/**
|
|
129
|
+
* Pick the slot with the fewest in-flight handles. Ties are broken by a
|
|
130
|
+
* deterministic hash of the conversation id, which avoids consistently
|
|
131
|
+
* loading slot 0 when N concurrent opens race.
|
|
132
|
+
*/
|
|
133
|
+
private pickLowestLoadedSlot;
|
|
134
|
+
private compositeKey;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Module-singleton registry. The engine reads this on every generate; the
|
|
138
|
+
* conversation lifecycle API (`openConversation`, `closeConversation`)
|
|
139
|
+
* mutates it.
|
|
140
|
+
*/
|
|
141
|
+
export declare const conversationRegistry: ConversationRegistry;
|
|
142
|
+
//# sourceMappingURL=conversation-registry.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"conversation-registry.d.ts","sourceRoot":"","sources":["conversation-registry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAIH;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IAClC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB;;;;;;OAMG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,gDAAgD;IAChD,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,yEAAyE;IACzE,UAAU,EAAE,MAAM,CAAC;IACnB,qEAAqE;IACrE,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,8EAA8E;IAC9E,MAAM,EAAE,OAAO,CAAC;CAChB;AAED,MAAM,WAAW,oBAAoB;IACpC,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,0EAA0E;IAC1E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;CACf;AAID;;;;GAIG;AACH,qBAAa,oBAAoB;IAChC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAyC;IACjE,sEAAsE;IACtE,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA6B;IACtD,wFAAwF;IACxF,OAAO,CAAC,aAAa,CAAK;IAE1B;;;;;OAKG;IACH,IAAI,CAAC,IAAI,EAAE,oBAAoB,GAAG,kBAAkB;IAqCpD;;;;OAIG;IACH,GAAG,CAAC,cAAc,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI;IAOvE;;;;OAIG;IACH,KAAK,CAAC,cAAc,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,IAAI;IAcpD;;;;OAIG;IACH,SAAS,CAAC,GAAG,GAAE,MAAmB,GAAG,MAAM,EAAE;IAkB7C;;;OAGG;IACH,QAAQ,IAAI,SAAS,kBAAkB,EAAE;IAIzC,yEAAyE;IACzE,SAAS,IAAI,MAAM;IAInB,wCAAwC;IACxC,IAAI,IAAI,MAAM;IAId;;;;;;;;;;;OAWG;IACH,mBAAmB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM;IAM5C;;;;;OAKG;IACH,eAAe,IAAI,IAAI;IAOvB;;;;OAIG;IACH,OAAO,CAAC,oBAAoB;IAqB5B,OAAO,CAAC,YAAY;CAGpB;AAED;;;;GAIG;AACH,eAAO,MAAM,oBAAoB,sBAA6B,CAAC"}
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
ConversationRegistry,
|
|
4
|
+
conversationRegistry,
|
|
5
|
+
} from "./conversation-registry";
|
|
6
|
+
|
|
7
|
+
describe("ConversationRegistry.open", () => {
|
|
8
|
+
it("returns the same handle for repeated opens of the same conversation", () => {
|
|
9
|
+
const registry = new ConversationRegistry();
|
|
10
|
+
const a = registry.open({
|
|
11
|
+
conversationId: "room-1",
|
|
12
|
+
modelId: "eliza-1-9b",
|
|
13
|
+
parallel: 4,
|
|
14
|
+
});
|
|
15
|
+
const b = registry.open({
|
|
16
|
+
conversationId: "room-1",
|
|
17
|
+
modelId: "eliza-1-9b",
|
|
18
|
+
parallel: 4,
|
|
19
|
+
});
|
|
20
|
+
expect(b).toBe(a);
|
|
21
|
+
expect(registry.size()).toBe(1);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("treats different model ids as distinct handles", () => {
|
|
25
|
+
const registry = new ConversationRegistry();
|
|
26
|
+
const a = registry.open({
|
|
27
|
+
conversationId: "room-1",
|
|
28
|
+
modelId: "model-a",
|
|
29
|
+
parallel: 4,
|
|
30
|
+
});
|
|
31
|
+
const b = registry.open({
|
|
32
|
+
conversationId: "room-1",
|
|
33
|
+
modelId: "model-b",
|
|
34
|
+
parallel: 4,
|
|
35
|
+
});
|
|
36
|
+
expect(b).not.toBe(a);
|
|
37
|
+
expect(registry.size()).toBe(2);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it("requires non-empty conversationId and modelId", () => {
|
|
41
|
+
const registry = new ConversationRegistry();
|
|
42
|
+
expect(() => registry.open({ conversationId: "", modelId: "m" })).toThrow();
|
|
43
|
+
expect(() => registry.open({ conversationId: "c", modelId: "" })).toThrow();
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("pins the handle to slot 0 when parallel <= 1", () => {
|
|
47
|
+
const registry = new ConversationRegistry();
|
|
48
|
+
const handle = registry.open({
|
|
49
|
+
conversationId: "x",
|
|
50
|
+
modelId: "m",
|
|
51
|
+
parallel: 1,
|
|
52
|
+
});
|
|
53
|
+
expect(handle.slotId).toBe(0);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("spreads concurrent opens across slots, lowest-loaded first", () => {
|
|
57
|
+
const registry = new ConversationRegistry();
|
|
58
|
+
const slots = new Set<number>();
|
|
59
|
+
for (let i = 0; i < 4; i += 1) {
|
|
60
|
+
const handle = registry.open({
|
|
61
|
+
conversationId: `room-${i}`,
|
|
62
|
+
modelId: "m",
|
|
63
|
+
parallel: 4,
|
|
64
|
+
});
|
|
65
|
+
slots.add(handle.slotId);
|
|
66
|
+
}
|
|
67
|
+
expect(slots.size).toBe(4);
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
describe("ConversationRegistry.close", () => {
|
|
72
|
+
it("frees the slot and is idempotent", () => {
|
|
73
|
+
const registry = new ConversationRegistry();
|
|
74
|
+
const handle = registry.open({
|
|
75
|
+
conversationId: "x",
|
|
76
|
+
modelId: "m",
|
|
77
|
+
parallel: 4,
|
|
78
|
+
});
|
|
79
|
+
expect(handle.closed).toBe(false);
|
|
80
|
+
registry.close("x", "m");
|
|
81
|
+
registry.close("x", "m"); // idempotent — must not throw
|
|
82
|
+
expect(registry.get("x", "m")).toBeNull();
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("frees a slot for reuse on next open", () => {
|
|
86
|
+
const registry = new ConversationRegistry();
|
|
87
|
+
const a = registry.open({
|
|
88
|
+
conversationId: "a",
|
|
89
|
+
modelId: "m",
|
|
90
|
+
parallel: 2,
|
|
91
|
+
});
|
|
92
|
+
const b = registry.open({
|
|
93
|
+
conversationId: "b",
|
|
94
|
+
modelId: "m",
|
|
95
|
+
parallel: 2,
|
|
96
|
+
});
|
|
97
|
+
expect(a.slotId).not.toBe(b.slotId);
|
|
98
|
+
registry.close("a", "m");
|
|
99
|
+
const c = registry.open({
|
|
100
|
+
conversationId: "c",
|
|
101
|
+
modelId: "m",
|
|
102
|
+
parallel: 2,
|
|
103
|
+
});
|
|
104
|
+
// c should land on the freed slot (a's slot)
|
|
105
|
+
expect(c.slotId).toBe(a.slotId);
|
|
106
|
+
});
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
describe("ConversationRegistry.get", () => {
|
|
110
|
+
it("returns null for unknown or closed handles", () => {
|
|
111
|
+
const registry = new ConversationRegistry();
|
|
112
|
+
expect(registry.get("nope", "m")).toBeNull();
|
|
113
|
+
registry.open({ conversationId: "x", modelId: "m", parallel: 4 });
|
|
114
|
+
registry.close("x", "m");
|
|
115
|
+
expect(registry.get("x", "m")).toBeNull();
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
describe("ConversationRegistry.evictIdle", () => {
|
|
120
|
+
it("drops handles whose ttl has elapsed", () => {
|
|
121
|
+
const registry = new ConversationRegistry();
|
|
122
|
+
registry.open({
|
|
123
|
+
conversationId: "x",
|
|
124
|
+
modelId: "m",
|
|
125
|
+
parallel: 4,
|
|
126
|
+
ttlMs: 1_000,
|
|
127
|
+
});
|
|
128
|
+
expect(registry.size()).toBe(1);
|
|
129
|
+
const dropped = registry.evictIdle(Date.now() + 5_000);
|
|
130
|
+
expect(dropped).toEqual(["x"]);
|
|
131
|
+
expect(registry.size()).toBe(0);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("keeps handles whose ttl has NOT elapsed", () => {
|
|
135
|
+
const registry = new ConversationRegistry();
|
|
136
|
+
registry.open({
|
|
137
|
+
conversationId: "x",
|
|
138
|
+
modelId: "m",
|
|
139
|
+
parallel: 4,
|
|
140
|
+
ttlMs: 60_000,
|
|
141
|
+
});
|
|
142
|
+
const dropped = registry.evictIdle(Date.now() + 10_000);
|
|
143
|
+
expect(dropped).toEqual([]);
|
|
144
|
+
expect(registry.size()).toBe(1);
|
|
145
|
+
});
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
describe("ConversationRegistry.highWater", () => {
|
|
149
|
+
it("tracks the largest concurrent open count", () => {
|
|
150
|
+
const registry = new ConversationRegistry();
|
|
151
|
+
expect(registry.highWater()).toBe(0);
|
|
152
|
+
registry.open({ conversationId: "a", modelId: "m", parallel: 8 });
|
|
153
|
+
registry.open({ conversationId: "b", modelId: "m", parallel: 8 });
|
|
154
|
+
registry.open({ conversationId: "c", modelId: "m", parallel: 8 });
|
|
155
|
+
expect(registry.highWater()).toBe(3);
|
|
156
|
+
registry.close("a", "m");
|
|
157
|
+
registry.close("b", "m");
|
|
158
|
+
// High-water mark must NOT decrease — it's a max over the lifetime
|
|
159
|
+
expect(registry.highWater()).toBe(3);
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
describe("ConversationRegistry.recommendedParallel (--parallel auto-resize decision)", () => {
|
|
164
|
+
it("returns the running count when the high-water mark hasn't outgrown it", () => {
|
|
165
|
+
const registry = new ConversationRegistry();
|
|
166
|
+
// 2 concurrent, headroom max(2, ceil(2*0.25)=1) = 2 → desired 4.
|
|
167
|
+
registry.open({ conversationId: "a", modelId: "m", parallel: 4 });
|
|
168
|
+
registry.open({ conversationId: "b", modelId: "m", parallel: 4 });
|
|
169
|
+
expect(registry.highWater()).toBe(2);
|
|
170
|
+
expect(registry.recommendedParallel(4)).toBe(4); // 4 already covers it
|
|
171
|
+
expect(registry.recommendedParallel(8)).toBe(8); // larger running wins
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it("recommends high-water + 25%-headroom when it exceeds the running count", () => {
|
|
175
|
+
const registry = new ConversationRegistry();
|
|
176
|
+
for (let i = 0; i < 20; i += 1) {
|
|
177
|
+
registry.open({ conversationId: `c-${i}`, modelId: "m", parallel: 4 });
|
|
178
|
+
}
|
|
179
|
+
expect(registry.highWater()).toBe(20);
|
|
180
|
+
// 20 + max(2, ceil(20*0.25)=5) = 25.
|
|
181
|
+
expect(registry.recommendedParallel(4)).toBe(25);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
it("headroom floors at 2 (small high-water marks still get a buffer)", () => {
|
|
185
|
+
const registry = new ConversationRegistry();
|
|
186
|
+
for (let i = 0; i < 5; i += 1) {
|
|
187
|
+
registry.open({ conversationId: `c-${i}`, modelId: "m", parallel: 2 });
|
|
188
|
+
}
|
|
189
|
+
expect(registry.highWater()).toBe(5);
|
|
190
|
+
// ceil(5*0.25) = 2 → headroom 2 → desired 7.
|
|
191
|
+
expect(registry.recommendedParallel(2)).toBe(7);
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it("is monotonic: closing conversations does not shrink the recommendation", () => {
|
|
195
|
+
const registry = new ConversationRegistry();
|
|
196
|
+
const handles = Array.from({ length: 10 }, (_, i) =>
|
|
197
|
+
registry.open({ conversationId: `c-${i}`, modelId: "m", parallel: 4 }),
|
|
198
|
+
);
|
|
199
|
+
expect(registry.recommendedParallel(4)).toBe(13); // 10 + ceil(10*.25)=3
|
|
200
|
+
for (const h of handles) registry.close(h.conversationId, h.modelId);
|
|
201
|
+
expect(registry.size()).toBe(0);
|
|
202
|
+
expect(registry.recommendedParallel(4)).toBe(13); // unchanged
|
|
203
|
+
});
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
describe("ConversationRegistry.__resetForTests", () => {
|
|
207
|
+
it("drops every handle and resets the high-water mark", () => {
|
|
208
|
+
const registry = new ConversationRegistry();
|
|
209
|
+
const a = registry.open({ conversationId: "a", modelId: "m", parallel: 4 });
|
|
210
|
+
registry.open({ conversationId: "b", modelId: "m", parallel: 4 });
|
|
211
|
+
expect(registry.size()).toBe(2);
|
|
212
|
+
expect(registry.highWater()).toBe(2);
|
|
213
|
+
registry.__resetForTests();
|
|
214
|
+
expect(registry.size()).toBe(0);
|
|
215
|
+
expect(registry.highWater()).toBe(0);
|
|
216
|
+
expect(registry.recommendedParallel(4)).toBe(4);
|
|
217
|
+
// The dropped handle is marked closed (further use is rejected by the engine).
|
|
218
|
+
expect(a.closed).toBe(true);
|
|
219
|
+
// A slot freed by reset is reusable from slot 0 again.
|
|
220
|
+
const handle = registry.open({
|
|
221
|
+
conversationId: "c",
|
|
222
|
+
modelId: "m",
|
|
223
|
+
parallel: 4,
|
|
224
|
+
});
|
|
225
|
+
expect(handle.slotId).toBe(0);
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
it("isolates the module singleton across test files", () => {
|
|
229
|
+
conversationRegistry.__resetForTests();
|
|
230
|
+
conversationRegistry.open({ conversationId: "leak", modelId: "m" });
|
|
231
|
+
expect(conversationRegistry.size()).toBe(1);
|
|
232
|
+
conversationRegistry.__resetForTests();
|
|
233
|
+
expect(conversationRegistry.size()).toBe(0);
|
|
234
|
+
});
|
|
235
|
+
});
|