@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-cut resource sharing between the text + voice surfaces of a
|
|
3
|
+
* single Eliza-1 bundle.
|
|
4
|
+
*
|
|
5
|
+
* Per `packages/inference/AGENTS.md` §4 ("shared KV cache scheduling,
|
|
6
|
+
* not shared KV memory" + "one process, one llama.cpp build, one GGML
|
|
7
|
+
* pin"), text and voice MUST share:
|
|
8
|
+
* - the tokenizer (Eliza-1/OmniVoice share a vocabulary in this lineage),
|
|
9
|
+
* - the mmap regions for weights (deduplicated by absolute path),
|
|
10
|
+
* - the kernel set (same shipped llama.cpp library after fusion),
|
|
11
|
+
* - the scheduler queue (one queue, prioritised),
|
|
12
|
+
* - the native MTP draft path (always wired for Eliza-1).
|
|
13
|
+
*
|
|
14
|
+
* What they do NOT share:
|
|
15
|
+
* - KV cache memory (different layer counts, different head configs,
|
|
16
|
+
* different quantizations — separate caches, shared scheduler).
|
|
17
|
+
*
|
|
18
|
+
* This module owns reference counts on each shared resource and is the
|
|
19
|
+
* single arbiter of when a voice-only region can be released. It does
|
|
20
|
+
* NOT do any I/O itself — the actual mmap, madvise, or full model-unload
|
|
21
|
+
* behavior lives behind the `MmapRegionHandle` interface so platform
|
|
22
|
+
* bindings can choose the right memory policy.
|
|
23
|
+
*/
|
|
24
|
+
/** Minimal structural logger — keeps this module free of upstream deps. */
|
|
25
|
+
interface Logger {
|
|
26
|
+
debug?(message: string): void;
|
|
27
|
+
warn?(message: string): void;
|
|
28
|
+
info?(message: string): void;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* The model roles that can be resident at once on the local-inference
|
|
32
|
+
* path. The `MemoryMonitor` evicts them in *ascending priority* under RAM
|
|
33
|
+
* pressure (lowest first): low-cost voice auxiliaries are cheapest to drop,
|
|
34
|
+
* the text target is the last thing to go. Voice TTS/ASR weights are evicted
|
|
35
|
+
* via `MmapRegionHandle.evictPages()`; the embedding model is unloaded by
|
|
36
|
+
* its owner.
|
|
37
|
+
*/
|
|
38
|
+
export type ResidentModelRole = "drafter" | "emotion" | "speaker-id" | "vision" | "embedding" | "vad" | "asr" | "tts" | "text-target";
|
|
39
|
+
/**
|
|
40
|
+
* Eviction priority by role — lower evicts first. Matches the brief's
|
|
41
|
+
* `emotion < speaker-id < vision/mmproj < embedding < vad < ASR <
|
|
42
|
+
* TTS < text-target`. The cold-3 set (`emotion`, `speaker-id`) is cheap to
|
|
43
|
+
* load on demand, so evicting them is the first reclamation step under
|
|
44
|
+
* sustained pressure. See `.swarm/research/R9-memory.md` §4.1.
|
|
45
|
+
*/
|
|
46
|
+
export declare const RESIDENT_ROLE_PRIORITY: Readonly<Record<ResidentModelRole, number>>;
|
|
47
|
+
/**
|
|
48
|
+
* An evictable resident model role. The registry walks these in ascending
|
|
49
|
+
* `evictionPriority` under memory pressure and calls `evict()` until enough
|
|
50
|
+
* RAM has been reclaimed. `evict()` MUST be idempotent (a no-op when already
|
|
51
|
+
* evicted) and the role MUST re-load lazily on next use — the monitor only
|
|
52
|
+
* frees memory, it never re-loads.
|
|
53
|
+
*/
|
|
54
|
+
export interface EvictableModelRole extends RefCountedResource {
|
|
55
|
+
readonly role: ResidentModelRole;
|
|
56
|
+
/** Lower evicts first. Defaults to `RESIDENT_ROLE_PRIORITY[role]`. */
|
|
57
|
+
readonly evictionPriority: number;
|
|
58
|
+
/** True while the underlying weights/pages are still resident. */
|
|
59
|
+
isResident(): boolean;
|
|
60
|
+
/** Drop the resident weights/pages. Idempotent; re-loads lazily on demand. */
|
|
61
|
+
evict(): Promise<void>;
|
|
62
|
+
/** Best-effort estimate of RAM (MB) reclaimed by `evict()`. 0 when unknown. */
|
|
63
|
+
estimatedResidentMb(): number;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Build an `EvictableModelRole` from a role + an `evict` callback. `release()`
|
|
67
|
+
* defaults to a no-op (the registry's refcount, not `release`, gates eviction
|
|
68
|
+
* for these); pass one if the role owns disposable state. `estimatedMb` lets
|
|
69
|
+
* the monitor know roughly how much it will reclaim — pass 0 when unknown.
|
|
70
|
+
*/
|
|
71
|
+
export declare function createEvictableModelRole(args: {
|
|
72
|
+
id?: string;
|
|
73
|
+
role: ResidentModelRole;
|
|
74
|
+
evictionPriority?: number;
|
|
75
|
+
estimatedMb?: number;
|
|
76
|
+
isResident: () => boolean;
|
|
77
|
+
evict: () => Promise<void>;
|
|
78
|
+
release?: () => Promise<void>;
|
|
79
|
+
}): EvictableModelRole;
|
|
80
|
+
/**
|
|
81
|
+
* Anything ref-counted by the registry implements this. The caller of
|
|
82
|
+
* `release()` MUST guarantee that no further reads happen on the
|
|
83
|
+
* underlying resource — for mmap regions that means no kernel call has
|
|
84
|
+
* a pointer into the freed range.
|
|
85
|
+
*/
|
|
86
|
+
export interface RefCountedResource {
|
|
87
|
+
readonly id: string;
|
|
88
|
+
/** Released for real when the last ref drops. Idempotent. */
|
|
89
|
+
release(): Promise<void>;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* mmap region handle. The fused omnivoice/llama.cpp build owns the real
|
|
93
|
+
* mmap call (it happens inside the FFI) — this interface is the JS-side
|
|
94
|
+
* proxy for it, so the lifecycle code can request page eviction without
|
|
95
|
+
* binding to a specific backend.
|
|
96
|
+
*/
|
|
97
|
+
export interface MmapRegionHandle extends RefCountedResource {
|
|
98
|
+
/** Absolute path of the file backing the mmap region. */
|
|
99
|
+
readonly path: string;
|
|
100
|
+
/** Byte size of the mapped region. */
|
|
101
|
+
readonly sizeBytes: number;
|
|
102
|
+
/**
|
|
103
|
+
* Release memory pressure for this region. Backends may implement this
|
|
104
|
+
* as a page hint or as a full voice-runtime unload. Common mappings:
|
|
105
|
+
* - POSIX (Linux/Android/macOS-bg): `madvise(addr, len, MADV_DONTNEED)`
|
|
106
|
+
* - macOS (foreground / iOS): `madvise(addr, len, MADV_FREE_REUSABLE)`
|
|
107
|
+
* - Windows: `VirtualUnlock` + `OfferVirtualMemory`
|
|
108
|
+
*
|
|
109
|
+
* The lifecycle test mocks this to assert the call happened.
|
|
110
|
+
*/
|
|
111
|
+
evictPages(): Promise<void>;
|
|
112
|
+
}
|
|
113
|
+
/** Minimal tokenizer surface text + voice both consume. */
|
|
114
|
+
export interface SharedTokenizer extends RefCountedResource {
|
|
115
|
+
readonly vocabSize: number;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Kernel set descriptor. The actual kernels are inside the fused
|
|
119
|
+
* llama.cpp build; this is the metadata the runtime reads at startup
|
|
120
|
+
* (AGENTS.md §3 #5: "the runtime MUST log the kernel set on startup").
|
|
121
|
+
*/
|
|
122
|
+
export interface KernelSet extends RefCountedResource {
|
|
123
|
+
readonly kernels: ReadonlyArray<string>;
|
|
124
|
+
}
|
|
125
|
+
/** Scheduler graph slot. One per active engine, refcounted by surface. */
|
|
126
|
+
export interface SchedulerSlot extends RefCountedResource {
|
|
127
|
+
/** Surface (text/voice) currently holding a ref. */
|
|
128
|
+
surfaces(): ReadonlyArray<"text" | "voice">;
|
|
129
|
+
}
|
|
130
|
+
/** Native MTP draft state is shared between text-only and voice modes. */
|
|
131
|
+
export interface MtpDraftHandle extends RefCountedResource {
|
|
132
|
+
readonly modelId: string;
|
|
133
|
+
}
|
|
134
|
+
export declare function createMtpDraftHandle(args: {
|
|
135
|
+
modelId: string;
|
|
136
|
+
}): MtpDraftHandle;
|
|
137
|
+
/**
|
|
138
|
+
* Owns the shared resources for one engine. Voice + text both `acquire`
|
|
139
|
+
* and `release` against the same registry; the registry only releases
|
|
140
|
+
* the underlying resource when refcount hits zero.
|
|
141
|
+
*
|
|
142
|
+
* Thread-safety: all methods run on the single Node event loop; no
|
|
143
|
+
* locks needed. Promises returned from `release()` MUST be awaited so
|
|
144
|
+
* the lifecycle state machine can observe completion.
|
|
145
|
+
*/
|
|
146
|
+
export declare class SharedResourceRegistry {
|
|
147
|
+
private readonly entries;
|
|
148
|
+
private readonly log?;
|
|
149
|
+
constructor(opts?: {
|
|
150
|
+
logger?: Logger;
|
|
151
|
+
});
|
|
152
|
+
/**
|
|
153
|
+
* Register a resource if absent, increment refcount otherwise. Returns
|
|
154
|
+
* the canonical instance — callers MUST use the returned value, not the
|
|
155
|
+
* one passed in, so a second registration with the same id resolves to
|
|
156
|
+
* the original (deduplication by id).
|
|
157
|
+
*/
|
|
158
|
+
acquire<T extends RefCountedResource>(resource: T): T;
|
|
159
|
+
/**
|
|
160
|
+
* Decrement refcount; release the resource when it hits zero. Throws
|
|
161
|
+
* on unknown id — silent no-ops would hide leaks.
|
|
162
|
+
*/
|
|
163
|
+
release(id: string): Promise<void>;
|
|
164
|
+
/** Diagnostic: current refcount, or 0 when not present. */
|
|
165
|
+
refCount(id: string): number;
|
|
166
|
+
/** Diagnostic: snapshot of currently-tracked resource ids. */
|
|
167
|
+
ids(): ReadonlyArray<string>;
|
|
168
|
+
/** Total tracked resources. */
|
|
169
|
+
size(): number;
|
|
170
|
+
/**
|
|
171
|
+
* Currently-resident evictable model roles, ascending by eviction
|
|
172
|
+
* priority (cheapest-to-evict first). Used by `MemoryMonitor` to walk
|
|
173
|
+
* roles under RAM pressure. Non-resident roles are filtered out — there's
|
|
174
|
+
* nothing to reclaim.
|
|
175
|
+
*/
|
|
176
|
+
evictableRoles(): ReadonlyArray<EvictableModelRole>;
|
|
177
|
+
/**
|
|
178
|
+
* Evict the lowest-priority resident role and return its `id`, or `null`
|
|
179
|
+
* when nothing is evictable. Observable: emits an `info` log line so the
|
|
180
|
+
* eviction is visible in the dev console. The role re-loads lazily on
|
|
181
|
+
* next use — this only frees memory.
|
|
182
|
+
*/
|
|
183
|
+
evictLowestPriorityRole(): Promise<{
|
|
184
|
+
id: string;
|
|
185
|
+
role: ResidentModelRole;
|
|
186
|
+
estimatedMb: number;
|
|
187
|
+
} | null>;
|
|
188
|
+
}
|
|
189
|
+
export {};
|
|
190
|
+
//# sourceMappingURL=shared-resources.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"shared-resources.d.ts","sourceRoot":"","sources":["shared-resources.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,2EAA2E;AAC3E,UAAU,MAAM;IACf,KAAK,CAAC,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,IAAI,CAAC,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,IAAI,CAAC,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED;;;;;;;GAOG;AACH,MAAM,MAAM,iBAAiB,GAC1B,SAAS,GACT,SAAS,GACT,YAAY,GACZ,QAAQ,GACR,WAAW,GACX,KAAK,GACL,KAAK,GACL,KAAK,GACL,aAAa,CAAC;AAEjB;;;;;;GAMG;AACH,eAAO,MAAM,sBAAsB,EAAE,QAAQ,CAC5C,MAAM,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAWjC,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,WAAW,kBAAmB,SAAQ,kBAAkB;IAC7D,QAAQ,CAAC,IAAI,EAAE,iBAAiB,CAAC;IACjC,sEAAsE;IACtE,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;IAClC,kEAAkE;IAClE,UAAU,IAAI,OAAO,CAAC;IACtB,8EAA8E;IAC9E,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,+EAA+E;IAC/E,mBAAmB,IAAI,MAAM,CAAC;CAC9B;AAeD;;;;;GAKG;AACH,wBAAgB,wBAAwB,CAAC,IAAI,EAAE;IAC9C,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,iBAAiB,CAAC;IACxB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,OAAO,CAAC;IAC1B,KAAK,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;CAC9B,GAAG,kBAAkB,CAkBrB;AAED;;;;;GAKG;AACH,MAAM,WAAW,kBAAkB;IAClC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,6DAA6D;IAC7D,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACzB;AAED;;;;;GAKG;AACH,MAAM,WAAW,gBAAiB,SAAQ,kBAAkB;IAC3D,yDAAyD;IACzD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,sCAAsC;IACtC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B;;;;;;;;OAQG;IACH,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC5B;AAED,2DAA2D;AAC3D,MAAM,WAAW,eAAgB,SAAQ,kBAAkB;IAC1D,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC3B;AAED;;;;GAIG;AACH,MAAM,WAAW,SAAU,SAAQ,kBAAkB;IACpD,QAAQ,CAAC,OAAO,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;CACxC;AAED,0EAA0E;AAC1E,MAAM,WAAW,aAAc,SAAQ,kBAAkB;IACxD,oDAAoD;IACpD,QAAQ,IAAI,aAAa,CAAC,MAAM,GAAG,OAAO,CAAC,CAAC;CAC5C;AAED,0EAA0E;AAC1E,MAAM,WAAW,cAAe,SAAQ,kBAAkB;IACzD,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CACzB;AAED,wBAAgB,oBAAoB,CAAC,IAAI,EAAE;IAC1C,OAAO,EAAE,MAAM,CAAC;CAChB,GAAG,cAAc,CAQjB;AAOD;;;;;;;;GAQG;AACH,qBAAa,sBAAsB;IAClC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAGpB;IACJ,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAS;gBAElB,IAAI,GAAE;QAAE,MAAM,CAAC,EAAE,MAAM,CAAA;KAAO;IAI1C;;;;;OAKG;IACH,OAAO,CAAC,CAAC,SAAS,kBAAkB,EAAE,QAAQ,EAAE,CAAC,GAAG,CAAC;IAUrD;;;OAGG;IACG,OAAO,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAcxC,2DAA2D;IAC3D,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM;IAI5B,8DAA8D;IAC9D,GAAG,IAAI,aAAa,CAAC,MAAM,CAAC;IAI5B,+BAA+B;IAC/B,IAAI,IAAI,MAAM;IAId;;;;;OAKG;IACH,cAAc,IAAI,aAAa,CAAC,kBAAkB,CAAC;IAUnD;;;;;OAKG;IACG,uBAAuB,IAAI,OAAO,CAAC;QACxC,EAAE,EAAE,MAAM,CAAC;QACX,IAAI,EAAE,iBAAiB,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;KACpB,GAAG,IAAI,CAAC;CAUT"}
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-cut resource sharing between the text + voice surfaces of a
|
|
3
|
+
* single Eliza-1 bundle.
|
|
4
|
+
*
|
|
5
|
+
* Per `packages/inference/AGENTS.md` §4 ("shared KV cache scheduling,
|
|
6
|
+
* not shared KV memory" + "one process, one llama.cpp build, one GGML
|
|
7
|
+
* pin"), text and voice MUST share:
|
|
8
|
+
* - the tokenizer (Eliza-1/OmniVoice share a vocabulary in this lineage),
|
|
9
|
+
* - the mmap regions for weights (deduplicated by absolute path),
|
|
10
|
+
* - the kernel set (same shipped llama.cpp library after fusion),
|
|
11
|
+
* - the scheduler queue (one queue, prioritised),
|
|
12
|
+
* - the native MTP draft path (always wired for Eliza-1).
|
|
13
|
+
*
|
|
14
|
+
* What they do NOT share:
|
|
15
|
+
* - KV cache memory (different layer counts, different head configs,
|
|
16
|
+
* different quantizations — separate caches, shared scheduler).
|
|
17
|
+
*
|
|
18
|
+
* This module owns reference counts on each shared resource and is the
|
|
19
|
+
* single arbiter of when a voice-only region can be released. It does
|
|
20
|
+
* NOT do any I/O itself — the actual mmap, madvise, or full model-unload
|
|
21
|
+
* behavior lives behind the `MmapRegionHandle` interface so platform
|
|
22
|
+
* bindings can choose the right memory policy.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
/** Minimal structural logger — keeps this module free of upstream deps. */
|
|
26
|
+
interface Logger {
|
|
27
|
+
debug?(message: string): void;
|
|
28
|
+
warn?(message: string): void;
|
|
29
|
+
info?(message: string): void;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* The model roles that can be resident at once on the local-inference
|
|
34
|
+
* path. The `MemoryMonitor` evicts them in *ascending priority* under RAM
|
|
35
|
+
* pressure (lowest first): low-cost voice auxiliaries are cheapest to drop,
|
|
36
|
+
* the text target is the last thing to go. Voice TTS/ASR weights are evicted
|
|
37
|
+
* via `MmapRegionHandle.evictPages()`; the embedding model is unloaded by
|
|
38
|
+
* its owner.
|
|
39
|
+
*/
|
|
40
|
+
export type ResidentModelRole =
|
|
41
|
+
| "drafter"
|
|
42
|
+
| "emotion"
|
|
43
|
+
| "speaker-id"
|
|
44
|
+
| "vision"
|
|
45
|
+
| "embedding"
|
|
46
|
+
| "vad"
|
|
47
|
+
| "asr"
|
|
48
|
+
| "tts"
|
|
49
|
+
| "text-target";
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Eviction priority by role — lower evicts first. Matches the brief's
|
|
53
|
+
* `emotion < speaker-id < vision/mmproj < embedding < vad < ASR <
|
|
54
|
+
* TTS < text-target`. The cold-3 set (`emotion`, `speaker-id`) is cheap to
|
|
55
|
+
* load on demand, so evicting them is the first reclamation step under
|
|
56
|
+
* sustained pressure. See `.swarm/research/R9-memory.md` §4.1.
|
|
57
|
+
*/
|
|
58
|
+
export const RESIDENT_ROLE_PRIORITY: Readonly<
|
|
59
|
+
Record<ResidentModelRole, number>
|
|
60
|
+
> = {
|
|
61
|
+
drafter: 10,
|
|
62
|
+
emotion: 15,
|
|
63
|
+
"speaker-id": 18,
|
|
64
|
+
vision: 20,
|
|
65
|
+
embedding: 25,
|
|
66
|
+
vad: 35,
|
|
67
|
+
asr: 40,
|
|
68
|
+
tts: 50,
|
|
69
|
+
"text-target": 100,
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* An evictable resident model role. The registry walks these in ascending
|
|
74
|
+
* `evictionPriority` under memory pressure and calls `evict()` until enough
|
|
75
|
+
* RAM has been reclaimed. `evict()` MUST be idempotent (a no-op when already
|
|
76
|
+
* evicted) and the role MUST re-load lazily on next use — the monitor only
|
|
77
|
+
* frees memory, it never re-loads.
|
|
78
|
+
*/
|
|
79
|
+
export interface EvictableModelRole extends RefCountedResource {
|
|
80
|
+
readonly role: ResidentModelRole;
|
|
81
|
+
/** Lower evicts first. Defaults to `RESIDENT_ROLE_PRIORITY[role]`. */
|
|
82
|
+
readonly evictionPriority: number;
|
|
83
|
+
/** True while the underlying weights/pages are still resident. */
|
|
84
|
+
isResident(): boolean;
|
|
85
|
+
/** Drop the resident weights/pages. Idempotent; re-loads lazily on demand. */
|
|
86
|
+
evict(): Promise<void>;
|
|
87
|
+
/** Best-effort estimate of RAM (MB) reclaimed by `evict()`. 0 when unknown. */
|
|
88
|
+
estimatedResidentMb(): number;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function isEvictableModelRole(
|
|
92
|
+
value: RefCountedResource,
|
|
93
|
+
): value is EvictableModelRole {
|
|
94
|
+
const candidate = value as Partial<EvictableModelRole>;
|
|
95
|
+
return (
|
|
96
|
+
typeof candidate.role === "string" &&
|
|
97
|
+
typeof candidate.evictionPriority === "number" &&
|
|
98
|
+
typeof candidate.isResident === "function" &&
|
|
99
|
+
typeof candidate.evict === "function" &&
|
|
100
|
+
typeof candidate.estimatedResidentMb === "function"
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Build an `EvictableModelRole` from a role + an `evict` callback. `release()`
|
|
106
|
+
* defaults to a no-op (the registry's refcount, not `release`, gates eviction
|
|
107
|
+
* for these); pass one if the role owns disposable state. `estimatedMb` lets
|
|
108
|
+
* the monitor know roughly how much it will reclaim — pass 0 when unknown.
|
|
109
|
+
*/
|
|
110
|
+
export function createEvictableModelRole(args: {
|
|
111
|
+
id?: string;
|
|
112
|
+
role: ResidentModelRole;
|
|
113
|
+
evictionPriority?: number;
|
|
114
|
+
estimatedMb?: number;
|
|
115
|
+
isResident: () => boolean;
|
|
116
|
+
evict: () => Promise<void>;
|
|
117
|
+
release?: () => Promise<void>;
|
|
118
|
+
}): EvictableModelRole {
|
|
119
|
+
const id = args.id ?? `model-role:${args.role}`;
|
|
120
|
+
const priority = args.evictionPriority ?? RESIDENT_ROLE_PRIORITY[args.role];
|
|
121
|
+
const estimatedMb = args.estimatedMb ?? 0;
|
|
122
|
+
return {
|
|
123
|
+
id,
|
|
124
|
+
role: args.role,
|
|
125
|
+
evictionPriority: priority,
|
|
126
|
+
isResident: args.isResident,
|
|
127
|
+
estimatedResidentMb: () => (args.isResident() ? estimatedMb : 0),
|
|
128
|
+
async evict(): Promise<void> {
|
|
129
|
+
if (!args.isResident()) return;
|
|
130
|
+
await args.evict();
|
|
131
|
+
},
|
|
132
|
+
async release(): Promise<void> {
|
|
133
|
+
await args.release?.();
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Anything ref-counted by the registry implements this. The caller of
|
|
140
|
+
* `release()` MUST guarantee that no further reads happen on the
|
|
141
|
+
* underlying resource — for mmap regions that means no kernel call has
|
|
142
|
+
* a pointer into the freed range.
|
|
143
|
+
*/
|
|
144
|
+
export interface RefCountedResource {
|
|
145
|
+
readonly id: string;
|
|
146
|
+
/** Released for real when the last ref drops. Idempotent. */
|
|
147
|
+
release(): Promise<void>;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* mmap region handle. The fused omnivoice/llama.cpp build owns the real
|
|
152
|
+
* mmap call (it happens inside the FFI) — this interface is the JS-side
|
|
153
|
+
* proxy for it, so the lifecycle code can request page eviction without
|
|
154
|
+
* binding to a specific backend.
|
|
155
|
+
*/
|
|
156
|
+
export interface MmapRegionHandle extends RefCountedResource {
|
|
157
|
+
/** Absolute path of the file backing the mmap region. */
|
|
158
|
+
readonly path: string;
|
|
159
|
+
/** Byte size of the mapped region. */
|
|
160
|
+
readonly sizeBytes: number;
|
|
161
|
+
/**
|
|
162
|
+
* Release memory pressure for this region. Backends may implement this
|
|
163
|
+
* as a page hint or as a full voice-runtime unload. Common mappings:
|
|
164
|
+
* - POSIX (Linux/Android/macOS-bg): `madvise(addr, len, MADV_DONTNEED)`
|
|
165
|
+
* - macOS (foreground / iOS): `madvise(addr, len, MADV_FREE_REUSABLE)`
|
|
166
|
+
* - Windows: `VirtualUnlock` + `OfferVirtualMemory`
|
|
167
|
+
*
|
|
168
|
+
* The lifecycle test mocks this to assert the call happened.
|
|
169
|
+
*/
|
|
170
|
+
evictPages(): Promise<void>;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/** Minimal tokenizer surface text + voice both consume. */
|
|
174
|
+
export interface SharedTokenizer extends RefCountedResource {
|
|
175
|
+
readonly vocabSize: number;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Kernel set descriptor. The actual kernels are inside the fused
|
|
180
|
+
* llama.cpp build; this is the metadata the runtime reads at startup
|
|
181
|
+
* (AGENTS.md §3 #5: "the runtime MUST log the kernel set on startup").
|
|
182
|
+
*/
|
|
183
|
+
export interface KernelSet extends RefCountedResource {
|
|
184
|
+
readonly kernels: ReadonlyArray<string>;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/** Scheduler graph slot. One per active engine, refcounted by surface. */
|
|
188
|
+
export interface SchedulerSlot extends RefCountedResource {
|
|
189
|
+
/** Surface (text/voice) currently holding a ref. */
|
|
190
|
+
surfaces(): ReadonlyArray<"text" | "voice">;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/** Native MTP draft state is shared between text-only and voice modes. */
|
|
194
|
+
export interface MtpDraftHandle extends RefCountedResource {
|
|
195
|
+
readonly modelId: string;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
export function createMtpDraftHandle(args: {
|
|
199
|
+
modelId: string;
|
|
200
|
+
}): MtpDraftHandle {
|
|
201
|
+
return {
|
|
202
|
+
id: `mtp:${args.modelId}`,
|
|
203
|
+
modelId: args.modelId,
|
|
204
|
+
async release(): Promise<void> {
|
|
205
|
+
// MTP state lifetime is owned by the active native text runtime.
|
|
206
|
+
},
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
interface RegistryEntry<T extends RefCountedResource> {
|
|
211
|
+
readonly resource: T;
|
|
212
|
+
refCount: number;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Owns the shared resources for one engine. Voice + text both `acquire`
|
|
217
|
+
* and `release` against the same registry; the registry only releases
|
|
218
|
+
* the underlying resource when refcount hits zero.
|
|
219
|
+
*
|
|
220
|
+
* Thread-safety: all methods run on the single Node event loop; no
|
|
221
|
+
* locks needed. Promises returned from `release()` MUST be awaited so
|
|
222
|
+
* the lifecycle state machine can observe completion.
|
|
223
|
+
*/
|
|
224
|
+
export class SharedResourceRegistry {
|
|
225
|
+
private readonly entries = new Map<
|
|
226
|
+
string,
|
|
227
|
+
RegistryEntry<RefCountedResource>
|
|
228
|
+
>();
|
|
229
|
+
private readonly log?: Logger;
|
|
230
|
+
|
|
231
|
+
constructor(opts: { logger?: Logger } = {}) {
|
|
232
|
+
this.log = opts.logger;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Register a resource if absent, increment refcount otherwise. Returns
|
|
237
|
+
* the canonical instance — callers MUST use the returned value, not the
|
|
238
|
+
* one passed in, so a second registration with the same id resolves to
|
|
239
|
+
* the original (deduplication by id).
|
|
240
|
+
*/
|
|
241
|
+
acquire<T extends RefCountedResource>(resource: T): T {
|
|
242
|
+
const existing = this.entries.get(resource.id);
|
|
243
|
+
if (existing) {
|
|
244
|
+
existing.refCount++;
|
|
245
|
+
return existing.resource as T;
|
|
246
|
+
}
|
|
247
|
+
this.entries.set(resource.id, { resource, refCount: 1 });
|
|
248
|
+
return resource;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Decrement refcount; release the resource when it hits zero. Throws
|
|
253
|
+
* on unknown id — silent no-ops would hide leaks.
|
|
254
|
+
*/
|
|
255
|
+
async release(id: string): Promise<void> {
|
|
256
|
+
const entry = this.entries.get(id);
|
|
257
|
+
if (!entry) {
|
|
258
|
+
throw new Error(
|
|
259
|
+
`[shared-resources] release(${id}): unknown resource — possible double release or registry desync`,
|
|
260
|
+
);
|
|
261
|
+
}
|
|
262
|
+
entry.refCount--;
|
|
263
|
+
if (entry.refCount > 0) return;
|
|
264
|
+
this.entries.delete(id);
|
|
265
|
+
await entry.resource.release();
|
|
266
|
+
this.log?.debug?.(`[SharedResourceRegistry] released ${id}`);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/** Diagnostic: current refcount, or 0 when not present. */
|
|
270
|
+
refCount(id: string): number {
|
|
271
|
+
return this.entries.get(id)?.refCount ?? 0;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/** Diagnostic: snapshot of currently-tracked resource ids. */
|
|
275
|
+
ids(): ReadonlyArray<string> {
|
|
276
|
+
return Array.from(this.entries.keys());
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/** Total tracked resources. */
|
|
280
|
+
size(): number {
|
|
281
|
+
return this.entries.size;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Currently-resident evictable model roles, ascending by eviction
|
|
286
|
+
* priority (cheapest-to-evict first). Used by `MemoryMonitor` to walk
|
|
287
|
+
* roles under RAM pressure. Non-resident roles are filtered out — there's
|
|
288
|
+
* nothing to reclaim.
|
|
289
|
+
*/
|
|
290
|
+
evictableRoles(): ReadonlyArray<EvictableModelRole> {
|
|
291
|
+
const out: EvictableModelRole[] = [];
|
|
292
|
+
for (const entry of this.entries.values()) {
|
|
293
|
+
if (isEvictableModelRole(entry.resource) && entry.resource.isResident()) {
|
|
294
|
+
out.push(entry.resource);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
return out.sort((a, b) => a.evictionPriority - b.evictionPriority);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Evict the lowest-priority resident role and return its `id`, or `null`
|
|
302
|
+
* when nothing is evictable. Observable: emits an `info` log line so the
|
|
303
|
+
* eviction is visible in the dev console. The role re-loads lazily on
|
|
304
|
+
* next use — this only frees memory.
|
|
305
|
+
*/
|
|
306
|
+
async evictLowestPriorityRole(): Promise<{
|
|
307
|
+
id: string;
|
|
308
|
+
role: ResidentModelRole;
|
|
309
|
+
estimatedMb: number;
|
|
310
|
+
} | null> {
|
|
311
|
+
const [target] = this.evictableRoles();
|
|
312
|
+
if (!target) return null;
|
|
313
|
+
const estimatedMb = target.estimatedResidentMb();
|
|
314
|
+
await target.evict();
|
|
315
|
+
this.log?.info?.(
|
|
316
|
+
`[SharedResourceRegistry] evicted role ${target.role} (${target.id}); reclaimed ~${estimatedMb} MB`,
|
|
317
|
+
);
|
|
318
|
+
return { id: target.id, role: target.role, estimatedMb };
|
|
319
|
+
}
|
|
320
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Speaker-ID + diarization attribution pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Wraps a `StreamingTranscriber` so the partial / final
|
|
5
|
+
* `TranscriptUpdate`s carry diarized `VoiceSegment[]` and a
|
|
6
|
+
* `primarySpeaker`. The attribution runs in parallel with ASR — the
|
|
7
|
+
* encoder fires the moment ≥ 1 s of audio is available, and the
|
|
8
|
+
* profile store's `beginMatch` starts at speech-start.
|
|
9
|
+
*
|
|
10
|
+
* This module owns *only* the attribution logic. It does NOT replace
|
|
11
|
+
* the transcriber; callers feed PCM through both the transcriber and
|
|
12
|
+
* the attributor in parallel, then attach the resolved metadata via
|
|
13
|
+
* `BaseStreamingTranscriber.setMetadataDefaults()` once it lands.
|
|
14
|
+
*
|
|
15
|
+
* Why a separate module: the existing `VoicePipeline` is large and
|
|
16
|
+
* already handles a lot. Putting attribution behind a small adapter
|
|
17
|
+
* lets the voice pipeline opt in without entangling the diarizer /
|
|
18
|
+
* encoder / profile-store dependencies into the streaming-ASR contract.
|
|
19
|
+
*/
|
|
20
|
+
import type { VoiceProfileObservation, VoiceProfileStore } from "../profile-store";
|
|
21
|
+
import type { VoiceInputSource, VoiceSegment, VoiceSpeaker, VoiceTurnMetadata } from "../types";
|
|
22
|
+
import type { Diarizer } from "./diarizer";
|
|
23
|
+
import type { SpeakerEncoder } from "./encoder";
|
|
24
|
+
export interface VoiceAttributionPipelineDeps {
|
|
25
|
+
encoder: SpeakerEncoder;
|
|
26
|
+
diarizer?: Diarizer;
|
|
27
|
+
profileStore: VoiceProfileStore;
|
|
28
|
+
}
|
|
29
|
+
export interface VoiceAttributionRequest {
|
|
30
|
+
turnId: string;
|
|
31
|
+
source?: VoiceInputSource;
|
|
32
|
+
/** Concatenated mono 16 kHz PCM for the entire turn. */
|
|
33
|
+
pcm: Float32Array;
|
|
34
|
+
startedAtMs?: number;
|
|
35
|
+
endedAtMs?: number;
|
|
36
|
+
/** When set, the attributor will only run if the abort signal isn't yet fired. */
|
|
37
|
+
signal?: AbortSignal;
|
|
38
|
+
}
|
|
39
|
+
export interface VoiceAttributionOutput {
|
|
40
|
+
turnId: string;
|
|
41
|
+
primarySpeaker?: VoiceSpeaker;
|
|
42
|
+
segments: VoiceSegment[];
|
|
43
|
+
turn: VoiceTurnMetadata;
|
|
44
|
+
observation: VoiceProfileObservation | null;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Run the diarizer + encoder + profile-store against a complete turn's
|
|
48
|
+
* audio. The caller is responsible for slicing the audio buffer (the
|
|
49
|
+
* pipeline's prefix queue already buffers the entire utterance for
|
|
50
|
+
* the streaming-ASR path).
|
|
51
|
+
*
|
|
52
|
+
* The high-level flow:
|
|
53
|
+
* 1. Diarizer runs on the full PCM, producing per-segment speaker
|
|
54
|
+
* tags (window-local ids).
|
|
55
|
+
* 2. We pick the longest local-speaker span and run the encoder on
|
|
56
|
+
* that span (≥ 1 s) to produce a 256-dim embedding.
|
|
57
|
+
* 3. The embedding is matched against the profile store. On hit,
|
|
58
|
+
* attribute the turn to the matched profile's entity. On miss,
|
|
59
|
+
* create a new cluster profile (no entity binding — that happens
|
|
60
|
+
* at the LifeOps layer based on utterance text).
|
|
61
|
+
* 4. Build `VoiceSegment[]` with the resolved speaker, plus a
|
|
62
|
+
* `VoiceTurnMetadata` for downstream consumers.
|
|
63
|
+
*/
|
|
64
|
+
export declare class VoiceAttributionPipeline {
|
|
65
|
+
private readonly deps;
|
|
66
|
+
constructor(deps: VoiceAttributionPipelineDeps);
|
|
67
|
+
attribute(req: VoiceAttributionRequest): Promise<VoiceAttributionOutput>;
|
|
68
|
+
private buildEmptyOutput;
|
|
69
|
+
private spliceSpans;
|
|
70
|
+
private spanMsTotal;
|
|
71
|
+
private localToVoiceSegments;
|
|
72
|
+
private localToUnknownSegments;
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=attribution-pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"attribution-pipeline.d.ts","sourceRoot":"","sources":["attribution-pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,KAAK,EACX,uBAAuB,EACvB,iBAAiB,EACjB,MAAM,kBAAkB,CAAC;AAE1B,OAAO,KAAK,EACX,gBAAgB,EAChB,YAAY,EACZ,YAAY,EACZ,iBAAiB,EACjB,MAAM,UAAU,CAAC;AAClB,OAAO,KAAK,EAAE,QAAQ,EAAuB,MAAM,YAAY,CAAC;AAChE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAGhD,MAAM,WAAW,4BAA4B;IAC5C,OAAO,EAAE,cAAc,CAAC;IACxB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,YAAY,EAAE,iBAAiB,CAAC;CAChC;AAED,MAAM,WAAW,uBAAuB;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,gBAAgB,CAAC;IAC1B,wDAAwD;IACxD,GAAG,EAAE,YAAY,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kFAAkF;IAClF,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB;AAED,MAAM,WAAW,sBAAsB;IACtC,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,QAAQ,EAAE,YAAY,EAAE,CAAC;IACzB,IAAI,EAAE,iBAAiB,CAAC;IACxB,WAAW,EAAE,uBAAuB,GAAG,IAAI,CAAC;CAC5C;AAgCD;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,wBAAwB;IACxB,OAAO,CAAC,QAAQ,CAAC,IAAI;gBAAJ,IAAI,EAAE,4BAA4B;IAEzD,SAAS,CACd,GAAG,EAAE,uBAAuB,GAC1B,OAAO,CAAC,sBAAsB,CAAC;IA2KlC,OAAO,CAAC,gBAAgB;IAexB,OAAO,CAAC,WAAW;IA0BnB,OAAO,CAAC,WAAW;IAMnB,OAAO,CAAC,oBAAoB;IAwC5B,OAAO,CAAC,sBAAsB;CAc9B"}
|