@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed unavailability error for ASR backends. Mirrors
|
|
3
|
+
* `ImageGenBackendUnavailableError` from `../imagegen/errors`.
|
|
4
|
+
*
|
|
5
|
+
* Backends throw this to signal that a specific request cannot be served
|
|
6
|
+
* (`unsupported_request`, `model_missing`, `load_failed`, ...) so the
|
|
7
|
+
* arbiter / caller can surface an actionable message rather than a generic
|
|
8
|
+
* "transcription failed".
|
|
9
|
+
*/
|
|
10
|
+
export type AsrUnavailableReason = "unsupported_request" | "model_missing" | "load_failed" | "decode_failed" | "aborted";
|
|
11
|
+
export declare class AsrBackendUnavailableError extends Error {
|
|
12
|
+
readonly backendId: string;
|
|
13
|
+
readonly reason: AsrUnavailableReason;
|
|
14
|
+
readonly code = "ASR_BACKEND_UNAVAILABLE";
|
|
15
|
+
constructor(backendId: string, reason: AsrUnavailableReason, message: string, options?: {
|
|
16
|
+
cause?: unknown;
|
|
17
|
+
});
|
|
18
|
+
toJSON(): Record<string, string>;
|
|
19
|
+
}
|
|
20
|
+
export declare function isAsrBackendUnavailable(err: unknown): err is AsrBackendUnavailableError;
|
|
21
|
+
//# sourceMappingURL=errors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["errors.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,MAAM,oBAAoB,GAC7B,qBAAqB,GACrB,eAAe,GACf,aAAa,GACb,eAAe,GACf,SAAS,CAAC;AAEb,qBAAa,0BAA2B,SAAQ,KAAK;IAInD,QAAQ,CAAC,SAAS,EAAE,MAAM;IAC1B,QAAQ,CAAC,MAAM,EAAE,oBAAoB;IAJtC,QAAQ,CAAC,IAAI,6BAA6B;gBAGhC,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,oBAAoB,EACrC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;IAM9B,MAAM,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;CAQhC;AAED,wBAAgB,uBAAuB,CACtC,GAAG,EAAE,OAAO,GACV,GAAG,IAAI,0BAA0B,CAOnC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed unavailability error for ASR backends. Mirrors
|
|
3
|
+
* `ImageGenBackendUnavailableError` from `../imagegen/errors`.
|
|
4
|
+
*
|
|
5
|
+
* Backends throw this to signal that a specific request cannot be served
|
|
6
|
+
* (`unsupported_request`, `model_missing`, `load_failed`, ...) so the
|
|
7
|
+
* arbiter / caller can surface an actionable message rather than a generic
|
|
8
|
+
* "transcription failed".
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export type AsrUnavailableReason =
|
|
12
|
+
| "unsupported_request"
|
|
13
|
+
| "model_missing"
|
|
14
|
+
| "load_failed"
|
|
15
|
+
| "decode_failed"
|
|
16
|
+
| "aborted";
|
|
17
|
+
|
|
18
|
+
export class AsrBackendUnavailableError extends Error {
|
|
19
|
+
readonly code = "ASR_BACKEND_UNAVAILABLE";
|
|
20
|
+
|
|
21
|
+
constructor(
|
|
22
|
+
readonly backendId: string,
|
|
23
|
+
readonly reason: AsrUnavailableReason,
|
|
24
|
+
message: string,
|
|
25
|
+
options?: { cause?: unknown },
|
|
26
|
+
) {
|
|
27
|
+
super(message, options);
|
|
28
|
+
this.name = "AsrBackendUnavailableError";
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
toJSON(): Record<string, string> {
|
|
32
|
+
return {
|
|
33
|
+
code: this.code,
|
|
34
|
+
backendId: this.backendId,
|
|
35
|
+
reason: this.reason,
|
|
36
|
+
message: this.message,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function isAsrBackendUnavailable(
|
|
42
|
+
err: unknown,
|
|
43
|
+
): err is AsrBackendUnavailableError {
|
|
44
|
+
return (
|
|
45
|
+
err instanceof AsrBackendUnavailableError ||
|
|
46
|
+
(typeof err === "object" &&
|
|
47
|
+
err !== null &&
|
|
48
|
+
(err as { code?: unknown }).code === "ASR_BACKEND_UNAVAILABLE")
|
|
49
|
+
);
|
|
50
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content-hash keys for the ASR transcript cache.
|
|
3
|
+
*
|
|
4
|
+
* The arbiter caches `AsrResult.text` by the SHA-256 of the
|
|
5
|
+
* sample-rate-normalized PCM bytes. The hash is fast (one pass over the
|
|
6
|
+
* fp32 buffer + a 4-byte sample-rate prefix) and stable across runs, so a
|
|
7
|
+
* caller who re-issues the same WAV gets the cached transcript without
|
|
8
|
+
* re-running the backend.
|
|
9
|
+
*
|
|
10
|
+
* We hash the underlying byte view of the Float32Array, not a re-quantized
|
|
11
|
+
* representation, so caller-supplied PCM that came from a 16-bit WAV
|
|
12
|
+
* decode (the common case from the `runtime.useModel(TRANSCRIPTION, ...)`
|
|
13
|
+
* path) deduplicates correctly with caller-supplied PCM that came from a
|
|
14
|
+
* fp32 mic capture.
|
|
15
|
+
*/
|
|
16
|
+
import type { AsrRequest } from "./types";
|
|
17
|
+
/**
|
|
18
|
+
* Hash a PCM buffer + sample rate into a stable cache key. The output is
|
|
19
|
+
* `<modelFamily>::sha256(<sampleRateLE><pcmBytes>)`.
|
|
20
|
+
*
|
|
21
|
+
* - `sampleRateLE`: 4-byte little-endian uint32 prefix so 16 kHz / 24 kHz
|
|
22
|
+
* inputs of the same float samples produce different keys.
|
|
23
|
+
* - `pcmBytes`: the raw fp32 view (`pcm.buffer.slice(pcm.byteOffset, pcm.byteOffset + pcm.byteLength)`).
|
|
24
|
+
*
|
|
25
|
+
* Returns the hex digest namespaced by family.
|
|
26
|
+
*/
|
|
27
|
+
export declare function hashAsrInput(req: AsrRequest, family?: string): string;
|
|
28
|
+
//# sourceMappingURL=hash.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hash.d.ts","sourceRoot":"","sources":["hash.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAIH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAK1C;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,GAAG,EAAE,UAAU,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,MAAM,CAerE"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content-hash keys for the ASR transcript cache.
|
|
3
|
+
*
|
|
4
|
+
* The arbiter caches `AsrResult.text` by the SHA-256 of the
|
|
5
|
+
* sample-rate-normalized PCM bytes. The hash is fast (one pass over the
|
|
6
|
+
* fp32 buffer + a 4-byte sample-rate prefix) and stable across runs, so a
|
|
7
|
+
* caller who re-issues the same WAV gets the cached transcript without
|
|
8
|
+
* re-running the backend.
|
|
9
|
+
*
|
|
10
|
+
* We hash the underlying byte view of the Float32Array, not a re-quantized
|
|
11
|
+
* representation, so caller-supplied PCM that came from a 16-bit WAV
|
|
12
|
+
* decode (the common case from the `runtime.useModel(TRANSCRIPTION, ...)`
|
|
13
|
+
* path) deduplicates correctly with caller-supplied PCM that came from a
|
|
14
|
+
* fp32 mic capture.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { createHash } from "node:crypto";
|
|
18
|
+
|
|
19
|
+
import type { AsrRequest } from "./types";
|
|
20
|
+
|
|
21
|
+
/** Family namespace prepended to every hash so caches scoped to different ASR families don't collide. */
|
|
22
|
+
const HASH_FAMILY_DEFAULT = "qwen3-asr";
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Hash a PCM buffer + sample rate into a stable cache key. The output is
|
|
26
|
+
* `<modelFamily>::sha256(<sampleRateLE><pcmBytes>)`.
|
|
27
|
+
*
|
|
28
|
+
* - `sampleRateLE`: 4-byte little-endian uint32 prefix so 16 kHz / 24 kHz
|
|
29
|
+
* inputs of the same float samples produce different keys.
|
|
30
|
+
* - `pcmBytes`: the raw fp32 view (`pcm.buffer.slice(pcm.byteOffset, pcm.byteOffset + pcm.byteLength)`).
|
|
31
|
+
*
|
|
32
|
+
* Returns the hex digest namespaced by family.
|
|
33
|
+
*/
|
|
34
|
+
export function hashAsrInput(req: AsrRequest, family?: string): string {
|
|
35
|
+
const hasher = createHash("sha256");
|
|
36
|
+
const rateBuf = new ArrayBuffer(4);
|
|
37
|
+
new DataView(rateBuf).setUint32(
|
|
38
|
+
0,
|
|
39
|
+
Math.max(0, Math.floor(req.sampleRateHz)),
|
|
40
|
+
true,
|
|
41
|
+
);
|
|
42
|
+
hasher.update(new Uint8Array(rateBuf));
|
|
43
|
+
const pcm = req.pcm;
|
|
44
|
+
const bytes = new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength);
|
|
45
|
+
hasher.update(bytes);
|
|
46
|
+
if (req.language) hasher.update(`lang:${req.language}`);
|
|
47
|
+
const fam = req.modelFamily ?? family ?? HASH_FAMILY_DEFAULT;
|
|
48
|
+
return `${fam}::${hasher.digest("hex")}`;
|
|
49
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local ASR (transcription) capability — public entry point.
|
|
3
|
+
*
|
|
4
|
+
* This module is what `provider.ts` (`createTranscriptionHandler`), the
|
|
5
|
+
* voice pipeline bridge, and additional ASR consumers import to register the
|
|
6
|
+
* capability with the WS1 MemoryArbiter.
|
|
7
|
+
*
|
|
8
|
+
* Wiring:
|
|
9
|
+
*
|
|
10
|
+
* const arbiter = service.getMemoryArbiter();
|
|
11
|
+
* const registration = createAsrCapabilityRegistration({
|
|
12
|
+
* loader: createDefaultAsrLoader({ ... }),
|
|
13
|
+
* transcriptCache: arbiter,
|
|
14
|
+
* });
|
|
15
|
+
* arbiter.registerCapability(registration);
|
|
16
|
+
*
|
|
17
|
+
* `createAsrCapabilityRegistration` wraps the underlying backend so the
|
|
18
|
+
* arbiter's `run(request)` path:
|
|
19
|
+
*
|
|
20
|
+
* 1. Calls `backend.supports(request)`. If false, the arbiter throws
|
|
21
|
+
* `AsrBackendUnavailableError` with `unsupported_request`.
|
|
22
|
+
* 2. Hashes the request's PCM bytes (sample-rate-normalized) and looks
|
|
23
|
+
* it up in the optional `transcriptCache`. On a hit, the backend is
|
|
24
|
+
* skipped entirely and the cached text is returned.
|
|
25
|
+
* 3. On a miss, calls `backend.transcribe(request)`, stores the
|
|
26
|
+
* transcript under the same hash, and returns the result.
|
|
27
|
+
*
|
|
28
|
+
* The capability registers with `residentRole: "asr"` — distinct from the
|
|
29
|
+
* `vision` slot that `vision-describe` and `image-gen` co-evict in.
|
|
30
|
+
* Qwen-ASR cohabits comfortably with text on most desktops (≈600 MB for
|
|
31
|
+
* Q4_K_M Qwen3-ASR-0.6B), but on a 6 GB iPhone / 8 GB low-tier Android
|
|
32
|
+
* the arbiter's resident-role priority still lets a text-large request
|
|
33
|
+
* evict the ASR handle when memory is tight. See `memory-arbiter.ts`
|
|
34
|
+
* `RESIDENT_ROLE_PRIORITY`.
|
|
35
|
+
*/
|
|
36
|
+
export { AsrBackendUnavailableError, type AsrUnavailableReason, isAsrBackendUnavailable, } from "./errors";
|
|
37
|
+
export { hashAsrInput } from "./hash";
|
|
38
|
+
export type { AsrBackend, AsrBackendLoader, AsrLoadArgs, AsrRequest, AsrResult, } from "./types";
|
|
39
|
+
import type { CapabilityRegistration } from "../memory-arbiter";
|
|
40
|
+
import type { AsrBackend, AsrBackendLoader, AsrRequest, AsrResult } from "./types";
|
|
41
|
+
/**
|
|
42
|
+
* Minimal cache shape the registration accepts. Lets tests inject a fake
|
|
43
|
+
* cache without pulling in the whole MemoryArbiter. The arbiter
|
|
44
|
+
* implements this surface (see `memory-arbiter.ts` `getCachedAsrTranscript`
|
|
45
|
+
* / `setCachedAsrTranscript`).
|
|
46
|
+
*/
|
|
47
|
+
export interface AsrTranscriptCacheLike {
|
|
48
|
+
getCachedAsrTranscript(hash: string): {
|
|
49
|
+
text: string;
|
|
50
|
+
live?: boolean;
|
|
51
|
+
} | null;
|
|
52
|
+
setCachedAsrTranscript(hash: string, entry: {
|
|
53
|
+
text: string;
|
|
54
|
+
}, ttlMs?: number): void;
|
|
55
|
+
}
|
|
56
|
+
export interface CreateAsrCapabilityRegistrationOptions {
|
|
57
|
+
loader: AsrBackendLoader;
|
|
58
|
+
/** Optional content-hash cache. When provided, identical PCM inputs skip the backend. */
|
|
59
|
+
transcriptCache?: AsrTranscriptCacheLike;
|
|
60
|
+
/** Default model family for the cache key. Defaults to `qwen3-asr`. */
|
|
61
|
+
modelFamily?: string;
|
|
62
|
+
/**
|
|
63
|
+
* Best-effort RAM footprint estimate for the loaded weights. The
|
|
64
|
+
* arbiter only uses this for telemetry; eviction is by priority. The
|
|
65
|
+
* default (600 MB) matches Q4_K_M Qwen3-ASR-0.6B; larger ASR models
|
|
66
|
+
* SHOULD pass a higher estimate.
|
|
67
|
+
*/
|
|
68
|
+
estimatedMb?: number;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Build a `CapabilityRegistration` ready to feed to
|
|
72
|
+
* `arbiter.registerCapability()`. Mirrors `createVisionCapabilityRegistration`
|
|
73
|
+
* (WS2) and `createImageGenCapabilityRegistration` (WS3).
|
|
74
|
+
*/
|
|
75
|
+
export declare function createAsrCapabilityRegistration(opts: CreateAsrCapabilityRegistrationOptions): CapabilityRegistration<AsrBackend, AsrRequest, AsrResult>;
|
|
76
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAEH,OAAO,EACN,0BAA0B,EAC1B,KAAK,oBAAoB,EACzB,uBAAuB,GACvB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,YAAY,EACX,UAAU,EACV,gBAAgB,EAChB,WAAW,EACX,UAAU,EACV,SAAS,GACT,MAAM,SAAS,CAAC;AAEjB,OAAO,KAAK,EAEX,sBAAsB,EACtB,MAAM,mBAAmB,CAAC;AAG3B,OAAO,KAAK,EACX,UAAU,EACV,gBAAgB,EAChB,UAAU,EACV,SAAS,EACT,MAAM,SAAS,CAAC;AAEjB;;;;;GAKG;AACH,MAAM,WAAW,sBAAsB;IACtC,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,IAAI,CAAC;IAC9E,sBAAsB,CACrB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,EACvB,KAAK,CAAC,EAAE,MAAM,GACZ,IAAI,CAAC;CACR;AAED,MAAM,WAAW,sCAAsC;IACtD,MAAM,EAAE,gBAAgB,CAAC;IACzB,yFAAyF;IACzF,eAAe,CAAC,EAAE,sBAAsB,CAAC;IACzC,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;GAIG;AACH,wBAAgB,+BAA+B,CAC9C,IAAI,EAAE,sCAAsC,GAC1C,sBAAsB,CAAC,UAAU,EAAE,UAAU,EAAE,SAAS,CAAC,CA6E3D"}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local ASR (transcription) capability — public entry point.
|
|
3
|
+
*
|
|
4
|
+
* This module is what `provider.ts` (`createTranscriptionHandler`), the
|
|
5
|
+
* voice pipeline bridge, and additional ASR consumers import to register the
|
|
6
|
+
* capability with the WS1 MemoryArbiter.
|
|
7
|
+
*
|
|
8
|
+
* Wiring:
|
|
9
|
+
*
|
|
10
|
+
* const arbiter = service.getMemoryArbiter();
|
|
11
|
+
* const registration = createAsrCapabilityRegistration({
|
|
12
|
+
* loader: createDefaultAsrLoader({ ... }),
|
|
13
|
+
* transcriptCache: arbiter,
|
|
14
|
+
* });
|
|
15
|
+
* arbiter.registerCapability(registration);
|
|
16
|
+
*
|
|
17
|
+
* `createAsrCapabilityRegistration` wraps the underlying backend so the
|
|
18
|
+
* arbiter's `run(request)` path:
|
|
19
|
+
*
|
|
20
|
+
* 1. Calls `backend.supports(request)`. If false, the arbiter throws
|
|
21
|
+
* `AsrBackendUnavailableError` with `unsupported_request`.
|
|
22
|
+
* 2. Hashes the request's PCM bytes (sample-rate-normalized) and looks
|
|
23
|
+
* it up in the optional `transcriptCache`. On a hit, the backend is
|
|
24
|
+
* skipped entirely and the cached text is returned.
|
|
25
|
+
* 3. On a miss, calls `backend.transcribe(request)`, stores the
|
|
26
|
+
* transcript under the same hash, and returns the result.
|
|
27
|
+
*
|
|
28
|
+
* The capability registers with `residentRole: "asr"` — distinct from the
|
|
29
|
+
* `vision` slot that `vision-describe` and `image-gen` co-evict in.
|
|
30
|
+
* Qwen-ASR cohabits comfortably with text on most desktops (≈600 MB for
|
|
31
|
+
* Q4_K_M Qwen3-ASR-0.6B), but on a 6 GB iPhone / 8 GB low-tier Android
|
|
32
|
+
* the arbiter's resident-role priority still lets a text-large request
|
|
33
|
+
* evict the ASR handle when memory is tight. See `memory-arbiter.ts`
|
|
34
|
+
* `RESIDENT_ROLE_PRIORITY`.
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
export {
|
|
38
|
+
AsrBackendUnavailableError,
|
|
39
|
+
type AsrUnavailableReason,
|
|
40
|
+
isAsrBackendUnavailable,
|
|
41
|
+
} from "./errors";
|
|
42
|
+
export { hashAsrInput } from "./hash";
|
|
43
|
+
export type {
|
|
44
|
+
AsrBackend,
|
|
45
|
+
AsrBackendLoader,
|
|
46
|
+
AsrLoadArgs,
|
|
47
|
+
AsrRequest,
|
|
48
|
+
AsrResult,
|
|
49
|
+
} from "./types";
|
|
50
|
+
|
|
51
|
+
import type {
|
|
52
|
+
ArbiterCapability,
|
|
53
|
+
CapabilityRegistration,
|
|
54
|
+
} from "../memory-arbiter";
|
|
55
|
+
import { AsrBackendUnavailableError } from "./errors";
|
|
56
|
+
import { hashAsrInput } from "./hash";
|
|
57
|
+
import type {
|
|
58
|
+
AsrBackend,
|
|
59
|
+
AsrBackendLoader,
|
|
60
|
+
AsrRequest,
|
|
61
|
+
AsrResult,
|
|
62
|
+
} from "./types";
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Minimal cache shape the registration accepts. Lets tests inject a fake
|
|
66
|
+
* cache without pulling in the whole MemoryArbiter. The arbiter
|
|
67
|
+
* implements this surface (see `memory-arbiter.ts` `getCachedAsrTranscript`
|
|
68
|
+
* / `setCachedAsrTranscript`).
|
|
69
|
+
*/
|
|
70
|
+
export interface AsrTranscriptCacheLike {
|
|
71
|
+
getCachedAsrTranscript(hash: string): { text: string; live?: boolean } | null;
|
|
72
|
+
setCachedAsrTranscript(
|
|
73
|
+
hash: string,
|
|
74
|
+
entry: { text: string },
|
|
75
|
+
ttlMs?: number,
|
|
76
|
+
): void;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface CreateAsrCapabilityRegistrationOptions {
|
|
80
|
+
loader: AsrBackendLoader;
|
|
81
|
+
/** Optional content-hash cache. When provided, identical PCM inputs skip the backend. */
|
|
82
|
+
transcriptCache?: AsrTranscriptCacheLike;
|
|
83
|
+
/** Default model family for the cache key. Defaults to `qwen3-asr`. */
|
|
84
|
+
modelFamily?: string;
|
|
85
|
+
/**
|
|
86
|
+
* Best-effort RAM footprint estimate for the loaded weights. The
|
|
87
|
+
* arbiter only uses this for telemetry; eviction is by priority. The
|
|
88
|
+
* default (600 MB) matches Q4_K_M Qwen3-ASR-0.6B; larger ASR models
|
|
89
|
+
* SHOULD pass a higher estimate.
|
|
90
|
+
*/
|
|
91
|
+
estimatedMb?: number;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Build a `CapabilityRegistration` ready to feed to
|
|
96
|
+
* `arbiter.registerCapability()`. Mirrors `createVisionCapabilityRegistration`
|
|
97
|
+
* (WS2) and `createImageGenCapabilityRegistration` (WS3).
|
|
98
|
+
*/
|
|
99
|
+
export function createAsrCapabilityRegistration(
|
|
100
|
+
opts: CreateAsrCapabilityRegistrationOptions,
|
|
101
|
+
): CapabilityRegistration<AsrBackend, AsrRequest, AsrResult> {
|
|
102
|
+
const capability: ArbiterCapability = "transcribe";
|
|
103
|
+
const loader = opts.loader;
|
|
104
|
+
const cache = opts.transcriptCache;
|
|
105
|
+
const family = opts.modelFamily ?? "qwen3-asr";
|
|
106
|
+
return {
|
|
107
|
+
capability,
|
|
108
|
+
// "asr" is its own resident-role slot in RESIDENT_ROLE_PRIORITY.
|
|
109
|
+
// Qwen-ASR doesn't coexist in the same VRAM band as vision/image-gen
|
|
110
|
+
// — the arbiter only evicts ASR when a higher-priority role
|
|
111
|
+
// (text-target, text-drafter) needs the budget back.
|
|
112
|
+
residentRole: "asr",
|
|
113
|
+
estimatedMb: opts.estimatedMb ?? 600,
|
|
114
|
+
async load(modelKey: string): Promise<AsrBackend> {
|
|
115
|
+
return await loader(modelKey);
|
|
116
|
+
},
|
|
117
|
+
async unload(backend: AsrBackend): Promise<void> {
|
|
118
|
+
await backend.dispose();
|
|
119
|
+
},
|
|
120
|
+
async run(backend: AsrBackend, request: AsrRequest): Promise<AsrResult> {
|
|
121
|
+
if (!(request.pcm instanceof Float32Array) || request.pcm.length === 0) {
|
|
122
|
+
throw new AsrBackendUnavailableError(
|
|
123
|
+
backend.id,
|
|
124
|
+
"unsupported_request",
|
|
125
|
+
`[asr] backend "${backend.id}" requires non-empty Float32Array pcm`,
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
if (!Number.isFinite(request.sampleRateHz) || request.sampleRateHz <= 0) {
|
|
129
|
+
throw new AsrBackendUnavailableError(
|
|
130
|
+
backend.id,
|
|
131
|
+
"unsupported_request",
|
|
132
|
+
`[asr] backend "${backend.id}" requires a positive sampleRateHz; got ${request.sampleRateHz}`,
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
if (!backend.supports(request)) {
|
|
136
|
+
throw new AsrBackendUnavailableError(
|
|
137
|
+
backend.id,
|
|
138
|
+
"unsupported_request",
|
|
139
|
+
`[asr] backend "${backend.id}" does not support this request (sampleRateHz=${request.sampleRateHz} pcmSamples=${request.pcm.length})`,
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
// Content-hash cache lookup. Honour `signal` between cache + backend.
|
|
143
|
+
if (request.signal?.aborted) {
|
|
144
|
+
throw request.signal.reason instanceof Error
|
|
145
|
+
? request.signal.reason
|
|
146
|
+
: new DOMException("Aborted", "AbortError");
|
|
147
|
+
}
|
|
148
|
+
let hash: string | null = null;
|
|
149
|
+
if (cache) {
|
|
150
|
+
try {
|
|
151
|
+
hash = hashAsrInput(request, family);
|
|
152
|
+
const hit = cache.getCachedAsrTranscript(hash);
|
|
153
|
+
if (hit && hit.live !== false) {
|
|
154
|
+
return { text: hit.text, cacheHit: true };
|
|
155
|
+
}
|
|
156
|
+
} catch {
|
|
157
|
+
// Hashing failed (zero-length pcm guarded above; this catches
|
|
158
|
+
// hash backend issues). Fall through to the backend.
|
|
159
|
+
hash = null;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
const result = await backend.transcribe(request);
|
|
163
|
+
if (request.signal?.aborted) {
|
|
164
|
+
throw request.signal.reason instanceof Error
|
|
165
|
+
? request.signal.reason
|
|
166
|
+
: new DOMException("Aborted", "AbortError");
|
|
167
|
+
}
|
|
168
|
+
if (cache && hash) {
|
|
169
|
+
try {
|
|
170
|
+
cache.setCachedAsrTranscript(hash, { text: result.text });
|
|
171
|
+
} catch {
|
|
172
|
+
// Caching is best-effort; never let a cache write fail a request.
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
return { ...result, cacheHit: false };
|
|
176
|
+
},
|
|
177
|
+
};
|
|
178
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local ASR (transcription) types — mirrors the WS2 vision-describe and
|
|
3
|
+
* WS3 image-gen splits.
|
|
4
|
+
*
|
|
5
|
+
* Two layers live here:
|
|
6
|
+
*
|
|
7
|
+
* 1. The **request/result** contract every backend implements
|
|
8
|
+
* (`AsrRequest`, `AsrResult`). Callers pass mono 16 kHz fp32 PCM (or
|
|
9
|
+
* a sample rate the backend can resample from), backends return a
|
|
10
|
+
* transcript string plus optional token / segment metadata.
|
|
11
|
+
*
|
|
12
|
+
* 2. The **backend** interface (`AsrBackend`) the `MemoryArbiter`
|
|
13
|
+
* (WS1) registers as a capability handler. One backend per
|
|
14
|
+
* per-platform path:
|
|
15
|
+
*
|
|
16
|
+
* - `fused` — `eliza_inference_asr_*` ABI (Qwen3-ASR
|
|
17
|
+
* inside the fused libelizainference). The sole on-device ASR
|
|
18
|
+
* runtime: Linux / macOS / Windows desktop, AOSP system app via
|
|
19
|
+
* the same fused build.
|
|
20
|
+
* - `coreml` — Capacitor bridge to a Core ML ASR
|
|
21
|
+
* model on iOS (unavailable until the bridge ships).
|
|
22
|
+
* - `aosp-ffi` — bun:ffi shim around the AOSP NDK
|
|
23
|
+
* Qwen-ASR JNI handle (unavailable until the AOSP fused
|
|
24
|
+
* ASR symbols are exported).
|
|
25
|
+
* - `fake` — deterministic in-process backend used by tests.
|
|
26
|
+
*
|
|
27
|
+
* Cache contract:
|
|
28
|
+
*
|
|
29
|
+
* The arbiter caches transcript text by **content hash** of the input PCM
|
|
30
|
+
* (sample-rate-normalized to 16 kHz mono). The cache namespace is
|
|
31
|
+
* `asr-transcripts` — distinct from the WS2 vision-embedding cache
|
|
32
|
+
* (`vision-projector-tokens`) and the WS3 image-gen request key space.
|
|
33
|
+
* Re-transcribing the same audio is a fast text return; the backend
|
|
34
|
+
* never re-runs.
|
|
35
|
+
*/
|
|
36
|
+
/** A request the ASR capability handler accepts. */
|
|
37
|
+
export interface AsrRequest {
|
|
38
|
+
/**
|
|
39
|
+
* Mono 32-bit float PCM samples. Range [-1, 1]. The backend will resample
|
|
40
|
+
* to 16 kHz internally if `sampleRateHz` is not 16000.
|
|
41
|
+
*/
|
|
42
|
+
pcm: Float32Array;
|
|
43
|
+
/** Sample rate of `pcm`, in Hz. Common values: 16000, 24000, 48000. */
|
|
44
|
+
sampleRateHz: number;
|
|
45
|
+
/** Optional caller-supplied abort signal. Backends MUST honour it. */
|
|
46
|
+
signal?: AbortSignal;
|
|
47
|
+
/** Optional model family tag for cache scoping. Defaults to `qwen3-asr`. */
|
|
48
|
+
modelFamily?: string;
|
|
49
|
+
/** Optional BCP-47 language hint (e.g. `"en"`, `"zh"`). Backends may ignore. */
|
|
50
|
+
language?: string;
|
|
51
|
+
}
|
|
52
|
+
/** Backend response. The arbiter normalizes this back to a plain string for the model handler. */
|
|
53
|
+
export interface AsrResult {
|
|
54
|
+
/** Final transcript. Whitespace-trimmed. Never undefined; empty string is a real "no speech detected" result. */
|
|
55
|
+
text: string;
|
|
56
|
+
/** Optional per-segment timings. Backends that support diarization emit one segment per speaker turn. */
|
|
57
|
+
segments?: ReadonlyArray<{
|
|
58
|
+
text: string;
|
|
59
|
+
startMs: number;
|
|
60
|
+
endMs: number;
|
|
61
|
+
speaker?: string;
|
|
62
|
+
}>;
|
|
63
|
+
/** Optional Qwen2-BPE token ids — the fused build emits these so STT-finish token injection skips re-tokenization. */
|
|
64
|
+
tokens?: ReadonlyArray<number>;
|
|
65
|
+
/** Optional inference time in ms (wall clock, not GPU compute). */
|
|
66
|
+
inferenceTimeMs?: number;
|
|
67
|
+
/** True when the response came from the arbiter's content-hash cache. */
|
|
68
|
+
cacheHit?: boolean;
|
|
69
|
+
}
|
|
70
|
+
/** Arguments the arbiter passes to the loader. */
|
|
71
|
+
export interface AsrLoadArgs {
|
|
72
|
+
modelKey: string;
|
|
73
|
+
}
|
|
74
|
+
/** Backend contract. Every per-platform ASR runtime implements this. */
|
|
75
|
+
export interface AsrBackend {
|
|
76
|
+
/** Stable identifier for telemetry / errors (`"fused"`, `"coreml"`, `"aosp-ffi"`, ...). */
|
|
77
|
+
readonly id: string;
|
|
78
|
+
/**
|
|
79
|
+
* Whether this backend supports the request as-is. False → the arbiter
|
|
80
|
+
* throws `AsrBackendUnavailableError` with `unsupported_request`. Most
|
|
81
|
+
* backends return `true` for any non-empty PCM ≤ a hard length cap.
|
|
82
|
+
*/
|
|
83
|
+
supports(req: AsrRequest): boolean;
|
|
84
|
+
/** Run a transcription. The backend MUST honour `req.signal`. */
|
|
85
|
+
transcribe(req: AsrRequest): Promise<AsrResult>;
|
|
86
|
+
/** Release native resources held by this backend handle. */
|
|
87
|
+
dispose(): Promise<void>;
|
|
88
|
+
}
|
|
89
|
+
/** Async loader the registration takes — invoked by the arbiter on first use of a given modelKey. */
|
|
90
|
+
export type AsrBackendLoader = (modelKey: string) => Promise<AsrBackend>;
|
|
91
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAEH,oDAAoD;AACpD,MAAM,WAAW,UAAU;IAC1B;;;OAGG;IACH,GAAG,EAAE,YAAY,CAAC;IAClB,uEAAuE;IACvE,YAAY,EAAE,MAAM,CAAC;IACrB,sEAAsE;IACtE,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,4EAA4E;IAC5E,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,gFAAgF;IAChF,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,kGAAkG;AAClG,MAAM,WAAW,SAAS;IACzB,iHAAiH;IACjH,IAAI,EAAE,MAAM,CAAC;IACb,yGAAyG;IACzG,QAAQ,CAAC,EAAE,aAAa,CAAC;QACxB,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC,CAAC;IACH,sHAAsH;IACtH,MAAM,CAAC,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IAC/B,mEAAmE;IACnE,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,yEAAyE;IACzE,QAAQ,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,kDAAkD;AAClD,MAAM,WAAW,WAAW;IAC3B,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED,wEAAwE;AACxE,MAAM,WAAW,UAAU;IAC1B,2FAA2F;IAC3F,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB;;;;OAIG;IACH,QAAQ,CAAC,GAAG,EAAE,UAAU,GAAG,OAAO,CAAC;IACnC,iEAAiE;IACjE,UAAU,CAAC,GAAG,EAAE,UAAU,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;IAChD,4DAA4D;IAC5D,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACzB;AAED,qGAAqG;AACrG,MAAM,MAAM,gBAAgB,GAAG,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local ASR (transcription) types — mirrors the WS2 vision-describe and
|
|
3
|
+
* WS3 image-gen splits.
|
|
4
|
+
*
|
|
5
|
+
* Two layers live here:
|
|
6
|
+
*
|
|
7
|
+
* 1. The **request/result** contract every backend implements
|
|
8
|
+
* (`AsrRequest`, `AsrResult`). Callers pass mono 16 kHz fp32 PCM (or
|
|
9
|
+
* a sample rate the backend can resample from), backends return a
|
|
10
|
+
* transcript string plus optional token / segment metadata.
|
|
11
|
+
*
|
|
12
|
+
* 2. The **backend** interface (`AsrBackend`) the `MemoryArbiter`
|
|
13
|
+
* (WS1) registers as a capability handler. One backend per
|
|
14
|
+
* per-platform path:
|
|
15
|
+
*
|
|
16
|
+
* - `fused` — `eliza_inference_asr_*` ABI (Qwen3-ASR
|
|
17
|
+
* inside the fused libelizainference). The sole on-device ASR
|
|
18
|
+
* runtime: Linux / macOS / Windows desktop, AOSP system app via
|
|
19
|
+
* the same fused build.
|
|
20
|
+
* - `coreml` — Capacitor bridge to a Core ML ASR
|
|
21
|
+
* model on iOS (unavailable until the bridge ships).
|
|
22
|
+
* - `aosp-ffi` — bun:ffi shim around the AOSP NDK
|
|
23
|
+
* Qwen-ASR JNI handle (unavailable until the AOSP fused
|
|
24
|
+
* ASR symbols are exported).
|
|
25
|
+
* - `fake` — deterministic in-process backend used by tests.
|
|
26
|
+
*
|
|
27
|
+
* Cache contract:
|
|
28
|
+
*
|
|
29
|
+
* The arbiter caches transcript text by **content hash** of the input PCM
|
|
30
|
+
* (sample-rate-normalized to 16 kHz mono). The cache namespace is
|
|
31
|
+
* `asr-transcripts` — distinct from the WS2 vision-embedding cache
|
|
32
|
+
* (`vision-projector-tokens`) and the WS3 image-gen request key space.
|
|
33
|
+
* Re-transcribing the same audio is a fast text return; the backend
|
|
34
|
+
* never re-runs.
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
/** A request the ASR capability handler accepts. */
|
|
38
|
+
export interface AsrRequest {
|
|
39
|
+
/**
|
|
40
|
+
* Mono 32-bit float PCM samples. Range [-1, 1]. The backend will resample
|
|
41
|
+
* to 16 kHz internally if `sampleRateHz` is not 16000.
|
|
42
|
+
*/
|
|
43
|
+
pcm: Float32Array;
|
|
44
|
+
/** Sample rate of `pcm`, in Hz. Common values: 16000, 24000, 48000. */
|
|
45
|
+
sampleRateHz: number;
|
|
46
|
+
/** Optional caller-supplied abort signal. Backends MUST honour it. */
|
|
47
|
+
signal?: AbortSignal;
|
|
48
|
+
/** Optional model family tag for cache scoping. Defaults to `qwen3-asr`. */
|
|
49
|
+
modelFamily?: string;
|
|
50
|
+
/** Optional BCP-47 language hint (e.g. `"en"`, `"zh"`). Backends may ignore. */
|
|
51
|
+
language?: string;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Backend response. The arbiter normalizes this back to a plain string for the model handler. */
|
|
55
|
+
export interface AsrResult {
|
|
56
|
+
/** Final transcript. Whitespace-trimmed. Never undefined; empty string is a real "no speech detected" result. */
|
|
57
|
+
text: string;
|
|
58
|
+
/** Optional per-segment timings. Backends that support diarization emit one segment per speaker turn. */
|
|
59
|
+
segments?: ReadonlyArray<{
|
|
60
|
+
text: string;
|
|
61
|
+
startMs: number;
|
|
62
|
+
endMs: number;
|
|
63
|
+
speaker?: string;
|
|
64
|
+
}>;
|
|
65
|
+
/** Optional Qwen2-BPE token ids — the fused build emits these so STT-finish token injection skips re-tokenization. */
|
|
66
|
+
tokens?: ReadonlyArray<number>;
|
|
67
|
+
/** Optional inference time in ms (wall clock, not GPU compute). */
|
|
68
|
+
inferenceTimeMs?: number;
|
|
69
|
+
/** True when the response came from the arbiter's content-hash cache. */
|
|
70
|
+
cacheHit?: boolean;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** Arguments the arbiter passes to the loader. */
|
|
74
|
+
export interface AsrLoadArgs {
|
|
75
|
+
modelKey: string;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Backend contract. Every per-platform ASR runtime implements this. */
|
|
79
|
+
export interface AsrBackend {
|
|
80
|
+
/** Stable identifier for telemetry / errors (`"fused"`, `"coreml"`, `"aosp-ffi"`, ...). */
|
|
81
|
+
readonly id: string;
|
|
82
|
+
/**
|
|
83
|
+
* Whether this backend supports the request as-is. False → the arbiter
|
|
84
|
+
* throws `AsrBackendUnavailableError` with `unsupported_request`. Most
|
|
85
|
+
* backends return `true` for any non-empty PCM ≤ a hard length cap.
|
|
86
|
+
*/
|
|
87
|
+
supports(req: AsrRequest): boolean;
|
|
88
|
+
/** Run a transcription. The backend MUST honour `req.signal`. */
|
|
89
|
+
transcribe(req: AsrRequest): Promise<AsrResult>;
|
|
90
|
+
/** Release native resources held by this backend handle. */
|
|
91
|
+
dispose(): Promise<void>;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** Async loader the registration takes — invoked by the arbiter on first use of a given modelKey. */
|
|
95
|
+
export type AsrBackendLoader = (modelKey: string) => Promise<AsrBackend>;
|