@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Eliza-1 EOT scorer — reuses the already-loaded text model to compute
|
|
3
|
+
* P(`<|im_end|>` next | conversation_so_far) without shipping a separate
|
|
4
|
+
* detector ONNX.
|
|
5
|
+
*
|
|
6
|
+
* The runtime keeps a `LlamaModel` resident for chat generation. Voice
|
|
7
|
+
* EOT scoring is a single forward pass over the formatted Qwen chat
|
|
8
|
+
* prompt with the trailing `<|im_end|>` removed. `capacitor-llama`'s
|
|
9
|
+
* `LlamaContextSequence.controlledEvaluate()` returns the next-token
|
|
10
|
+
* probability distribution, so we simply read the entry for the
|
|
11
|
+
* `<|im_end|>` token id — no sampling loop, no KV-cache growth on the
|
|
12
|
+
* chat session.
|
|
13
|
+
*
|
|
14
|
+
* A dedicated `LlamaContext` is held just for this scorer so we do not
|
|
15
|
+
* fight the chat session pool for sequence slots. The context is small
|
|
16
|
+
* (single sequence, ≤512 tokens) and shares the model weights with the
|
|
17
|
+
* chat path; only the per-sequence KV cache is duplicated.
|
|
18
|
+
*
|
|
19
|
+
* Optionally a LoRA adapter is attached to that context — that is how
|
|
20
|
+
* a fine-tuned EOT head (trained per
|
|
21
|
+
* `packages/training/scripts/turn_detector/`) layers onto the same base
|
|
22
|
+
* weights without a separate GGUF.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import path from "node:path";
|
|
26
|
+
|
|
27
|
+
// `capacitor-llama` 3.18.1 surface we depend on. We avoid importing the
|
|
28
|
+
// type directly so the binding stays an optional peer dep — callers pass
|
|
29
|
+
// the model through a structural type.
|
|
30
|
+
export interface LlamaModelLike {
|
|
31
|
+
tokenize(text: string, specialTokens?: boolean): readonly number[];
|
|
32
|
+
createContext(options: {
|
|
33
|
+
contextSize?: number;
|
|
34
|
+
sequences?: number;
|
|
35
|
+
flashAttention?: boolean;
|
|
36
|
+
lora?: string | { adapters: Array<{ filePath: string; scale?: number }> };
|
|
37
|
+
}): Promise<LlamaContextLike>;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface LlamaContextLike {
|
|
41
|
+
getSequence(options?: object): LlamaContextSequenceLike;
|
|
42
|
+
dispose(): Promise<void>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface LlamaContextSequenceLike {
|
|
46
|
+
clearHistory(): Promise<void>;
|
|
47
|
+
controlledEvaluate(
|
|
48
|
+
input: ControlledEvaluateInputLike[],
|
|
49
|
+
options?: { evaluationPriority?: number },
|
|
50
|
+
): Promise<Array<ControlledEvaluateOutputLike | undefined>>;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export type ControlledEvaluateInputLike =
|
|
54
|
+
| number
|
|
55
|
+
| [
|
|
56
|
+
token: number,
|
|
57
|
+
options: {
|
|
58
|
+
generateNext?: {
|
|
59
|
+
probabilities?: boolean;
|
|
60
|
+
confidence?: boolean;
|
|
61
|
+
};
|
|
62
|
+
},
|
|
63
|
+
];
|
|
64
|
+
|
|
65
|
+
export interface ControlledEvaluateOutputLike {
|
|
66
|
+
next: {
|
|
67
|
+
token?: number | null;
|
|
68
|
+
confidence?: number;
|
|
69
|
+
probabilities?: Map<number, number>;
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const IM_END_TOKEN = "<|im_end|>";
|
|
74
|
+
const IM_START_USER_PREFIX = "<|im_start|>user\n";
|
|
75
|
+
|
|
76
|
+
export interface Eliza1EotScorerOptions {
|
|
77
|
+
/** The already-loaded text model (eliza-1 drafter). */
|
|
78
|
+
model: LlamaModelLike;
|
|
79
|
+
/** Optional EOT LoRA adapter path (absolute). When set, applied to the dedicated EOT context. */
|
|
80
|
+
loraPath?: string;
|
|
81
|
+
/** Adapter scale (default 1.0). Only meaningful when `loraPath` is set. */
|
|
82
|
+
loraScale?: number;
|
|
83
|
+
/** Max history tokens to keep in the prompt window. LiveKit uses 128. */
|
|
84
|
+
maxHistoryTokens?: number;
|
|
85
|
+
/** Context size for the dedicated EOT context. Default 512. */
|
|
86
|
+
contextSize?: number;
|
|
87
|
+
/** Model label for telemetry. */
|
|
88
|
+
modelLabel?: string;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export interface Eliza1EotScoreResult {
|
|
92
|
+
/** Probability of `<|im_end|>` as the next token, ∈ [0, 1]. */
|
|
93
|
+
probability: number;
|
|
94
|
+
/** Wall-clock model latency for this scoring call. */
|
|
95
|
+
latencyMs: number;
|
|
96
|
+
/** Number of prompt tokens evaluated. */
|
|
97
|
+
promptTokens: number;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Stateful EOT scorer bound to a single loaded text model. Lazily
|
|
102
|
+
* allocates its own dedicated `LlamaContext` on first call. Safe to
|
|
103
|
+
* keep across many voice turns; call `dispose()` on shutdown.
|
|
104
|
+
*/
|
|
105
|
+
export class Eliza1EotScorer {
|
|
106
|
+
private readonly model: LlamaModelLike;
|
|
107
|
+
private readonly loraPath: string | undefined;
|
|
108
|
+
private readonly loraScale: number | undefined;
|
|
109
|
+
private readonly maxHistoryTokens: number;
|
|
110
|
+
private readonly contextSize: number;
|
|
111
|
+
readonly modelLabel: string;
|
|
112
|
+
|
|
113
|
+
private context: LlamaContextLike | null = null;
|
|
114
|
+
private sequence: LlamaContextSequenceLike | null = null;
|
|
115
|
+
private imEndTokenId: number | null = null;
|
|
116
|
+
private initPromise: Promise<void> | null = null;
|
|
117
|
+
/** Serializes concurrent calls — controlledEvaluate is not thread-safe per-sequence. */
|
|
118
|
+
private inflight: Promise<unknown> = Promise.resolve();
|
|
119
|
+
|
|
120
|
+
constructor(options: Eliza1EotScorerOptions) {
|
|
121
|
+
this.model = options.model;
|
|
122
|
+
this.loraPath = options.loraPath;
|
|
123
|
+
this.loraScale = options.loraScale;
|
|
124
|
+
this.maxHistoryTokens = options.maxHistoryTokens ?? 128;
|
|
125
|
+
this.contextSize = options.contextSize ?? 512;
|
|
126
|
+
this.modelLabel =
|
|
127
|
+
options.modelLabel ??
|
|
128
|
+
(this.loraPath
|
|
129
|
+
? `eliza-1-drafter+eot-lora:${path.basename(this.loraPath)}`
|
|
130
|
+
: "eliza-1-drafter");
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
async score(partialTranscript: string): Promise<Eliza1EotScoreResult> {
|
|
134
|
+
await this.ensureReady();
|
|
135
|
+
const sequence = this.sequence;
|
|
136
|
+
const imEndId = this.imEndTokenId;
|
|
137
|
+
if (!sequence || imEndId === null) {
|
|
138
|
+
throw new Error("[voice] Eliza1EotScorer not initialized.");
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const tokens = this.tokenizePrompt(partialTranscript);
|
|
142
|
+
const start = performance.now();
|
|
143
|
+
const next = this.inflight.then(() =>
|
|
144
|
+
this.runOnce(sequence, tokens, imEndId),
|
|
145
|
+
);
|
|
146
|
+
this.inflight = next.catch(() => undefined);
|
|
147
|
+
const probability = await next;
|
|
148
|
+
return {
|
|
149
|
+
probability,
|
|
150
|
+
latencyMs: performance.now() - start,
|
|
151
|
+
promptTokens: tokens.length,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
async dispose(): Promise<void> {
|
|
156
|
+
const ctx = this.context;
|
|
157
|
+
this.context = null;
|
|
158
|
+
this.sequence = null;
|
|
159
|
+
this.imEndTokenId = null;
|
|
160
|
+
this.initPromise = null;
|
|
161
|
+
if (ctx) await ctx.dispose();
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
private async ensureReady(): Promise<void> {
|
|
165
|
+
if (this.context && this.sequence && this.imEndTokenId !== null) return;
|
|
166
|
+
if (!this.initPromise) this.initPromise = this.initialize();
|
|
167
|
+
await this.initPromise;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
private async initialize(): Promise<void> {
|
|
171
|
+
const imEndIds = this.model.tokenize(IM_END_TOKEN, true);
|
|
172
|
+
if (imEndIds.length !== 1 || !Number.isInteger(imEndIds[0])) {
|
|
173
|
+
throw new Error(
|
|
174
|
+
`[voice] Eliza1EotScorer: model tokenizer did not resolve <|im_end|> to a single special token (got ${JSON.stringify(imEndIds)}). The base model must be Qwen-template compatible.`,
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
this.imEndTokenId = imEndIds[0];
|
|
178
|
+
|
|
179
|
+
const contextOptions: Parameters<LlamaModelLike["createContext"]>[0] = {
|
|
180
|
+
contextSize: this.contextSize,
|
|
181
|
+
sequences: 1,
|
|
182
|
+
flashAttention: true,
|
|
183
|
+
};
|
|
184
|
+
if (this.loraPath) {
|
|
185
|
+
contextOptions.lora = {
|
|
186
|
+
adapters: [
|
|
187
|
+
{
|
|
188
|
+
filePath: this.loraPath,
|
|
189
|
+
...(this.loraScale !== undefined ? { scale: this.loraScale } : {}),
|
|
190
|
+
},
|
|
191
|
+
],
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
this.context = await this.model.createContext(contextOptions);
|
|
196
|
+
this.sequence = this.context.getSequence();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
private tokenizePrompt(transcript: string): number[] {
|
|
200
|
+
const formatted = formatEotPrompt(transcript);
|
|
201
|
+
const ids = this.model.tokenize(formatted, true);
|
|
202
|
+
if (ids.length <= this.maxHistoryTokens) return [...ids];
|
|
203
|
+
return [...ids.slice(ids.length - this.maxHistoryTokens)];
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
private async runOnce(
|
|
207
|
+
sequence: LlamaContextSequenceLike,
|
|
208
|
+
tokens: number[],
|
|
209
|
+
imEndId: number,
|
|
210
|
+
): Promise<number> {
|
|
211
|
+
if (tokens.length === 0) return 0.5;
|
|
212
|
+
await sequence.clearHistory();
|
|
213
|
+
const input: ControlledEvaluateInputLike[] = tokens.map((tok, i) =>
|
|
214
|
+
i === tokens.length - 1
|
|
215
|
+
? [tok, { generateNext: { probabilities: true } }]
|
|
216
|
+
: tok,
|
|
217
|
+
);
|
|
218
|
+
const out = await sequence.controlledEvaluate(input);
|
|
219
|
+
const last = out[tokens.length - 1];
|
|
220
|
+
const probs = last?.next.probabilities;
|
|
221
|
+
if (!probs) return 0.5;
|
|
222
|
+
const p = probs.get(imEndId);
|
|
223
|
+
if (typeof p !== "number" || !Number.isFinite(p)) return 0.5;
|
|
224
|
+
return Math.max(0, Math.min(1, p));
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Format the partial transcript using the Qwen chat template, with the
|
|
230
|
+
* trailing `<|im_end|>` removed so the next predicted token *is* the
|
|
231
|
+
* EOT signal we want to measure.
|
|
232
|
+
*
|
|
233
|
+
* Matches the formatting LiveKit's turn-detector uses (single user turn,
|
|
234
|
+
* no system prompt, no generation prefix). When upstream history is
|
|
235
|
+
* available we can stack turns here, but the LiveKit recipe truncates
|
|
236
|
+
* to the last 128 tokens regardless, so a single user turn captures the
|
|
237
|
+
* relevant context for tier-1 EOT.
|
|
238
|
+
*/
|
|
239
|
+
export function formatEotPrompt(transcript: string): string {
|
|
240
|
+
const cleaned = transcript.trim();
|
|
241
|
+
return `${IM_START_USER_PREFIX}${cleaned}`;
|
|
242
|
+
}
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lazy embedding `llama-server` sidecar for Eliza-1 bundles.
|
|
3
|
+
*
|
|
4
|
+
* The route resolver decides which GGUF backs embeddings. This class owns the
|
|
5
|
+
* process boundary: validate the GGUF at construction, start the sidecar only
|
|
6
|
+
* on first non-empty `embed()`, and return Matryoshka-truncated vectors.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { type ChildProcess, spawn } from "node:child_process";
|
|
10
|
+
import { existsSync } from "node:fs";
|
|
11
|
+
import net from "node:net";
|
|
12
|
+
import os from "node:os";
|
|
13
|
+
import {
|
|
14
|
+
isValidEmbeddingDim,
|
|
15
|
+
type LocalEmbeddingRoute,
|
|
16
|
+
truncateMatryoshka,
|
|
17
|
+
} from "./embedding";
|
|
18
|
+
import { VoiceStartupError } from "./errors";
|
|
19
|
+
|
|
20
|
+
interface EmbeddingServerConfig {
|
|
21
|
+
/** GGUF the sidecar mmaps. For the dedicated-region mode this is the `embedding/` file. */
|
|
22
|
+
modelPath: string;
|
|
23
|
+
/** Extra `llama-server` flags — the route's `embeddingServerFlags` (`--embeddings --pooling last`). */
|
|
24
|
+
serverFlags: ReadonlyArray<string>;
|
|
25
|
+
/** GPU offload: `"auto"` (= all layers) for CPU/Vulkan/CUDA hosts, `0` to force CPU. */
|
|
26
|
+
gpuLayers?: number | "auto";
|
|
27
|
+
/** Thread count for the embedding forward pass. Defaults to the host's logical core count. */
|
|
28
|
+
threads?: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export class EmbeddingServer {
|
|
32
|
+
private readonly config: EmbeddingServerConfig;
|
|
33
|
+
private child: ChildProcess | null = null;
|
|
34
|
+
private baseUrl: string | null = null;
|
|
35
|
+
private starting: Promise<void> | null = null;
|
|
36
|
+
|
|
37
|
+
constructor(config: EmbeddingServerConfig) {
|
|
38
|
+
if (!existsSync(config.modelPath)) {
|
|
39
|
+
throw new VoiceStartupError(
|
|
40
|
+
"missing-bundle-root",
|
|
41
|
+
`[embedding-server] model GGUF not found at ${config.modelPath}`,
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
this.config = config;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
isRunning(): boolean {
|
|
48
|
+
return this.child !== null && this.child.exitCode === null;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async embed(texts: string[], dim = 1024): Promise<number[][]> {
|
|
52
|
+
if (texts.length === 0) return [];
|
|
53
|
+
if (!isValidEmbeddingDim(dim)) {
|
|
54
|
+
throw new Error(`[embedding] dim ${dim} is not a valid Matryoshka width`);
|
|
55
|
+
}
|
|
56
|
+
await this.ensureStarted();
|
|
57
|
+
const response = await fetch(`${this.baseUrl}/v1/embeddings`, {
|
|
58
|
+
method: "POST",
|
|
59
|
+
headers: { "content-type": "application/json" },
|
|
60
|
+
body: JSON.stringify({ input: texts }),
|
|
61
|
+
});
|
|
62
|
+
if (!response.ok) {
|
|
63
|
+
const body = await response.text().catch(() => "");
|
|
64
|
+
throw new Error(
|
|
65
|
+
`[embedding-server] /v1/embeddings returned ${response.status}: ${body.slice(0, 200)}`,
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
const payload = (await response.json()) as {
|
|
69
|
+
data?: Array<{ embedding?: number[] }>;
|
|
70
|
+
};
|
|
71
|
+
const rows = payload.data ?? [];
|
|
72
|
+
if (rows.length !== texts.length) {
|
|
73
|
+
throw new Error(
|
|
74
|
+
`[embedding-server] expected ${texts.length} embedding rows, got ${rows.length}`,
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
return rows.map((row, index) => {
|
|
78
|
+
if (!Array.isArray(row.embedding)) {
|
|
79
|
+
throw new Error(
|
|
80
|
+
`[embedding-server] response row ${index} missing embedding vector`,
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
return truncateMatryoshka(row.embedding, dim);
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
private async ensureStarted(): Promise<void> {
|
|
88
|
+
if (this.isRunning()) return;
|
|
89
|
+
this.starting ??= this.start().finally(() => {
|
|
90
|
+
this.starting = null;
|
|
91
|
+
});
|
|
92
|
+
await this.starting;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
private async start(): Promise<void> {
|
|
96
|
+
const port = await reserveTcpPort();
|
|
97
|
+
const binary =
|
|
98
|
+
process.env.ELIZA_LLAMA_SERVER_PATH?.trim() || "llama-server";
|
|
99
|
+
const args = [
|
|
100
|
+
"-m",
|
|
101
|
+
this.config.modelPath,
|
|
102
|
+
...this.config.serverFlags,
|
|
103
|
+
"--host",
|
|
104
|
+
"127.0.0.1",
|
|
105
|
+
"--port",
|
|
106
|
+
String(port),
|
|
107
|
+
"--threads",
|
|
108
|
+
String(this.config.threads ?? Math.max(1, os.cpus().length)),
|
|
109
|
+
];
|
|
110
|
+
if (typeof this.config.gpuLayers === "number") {
|
|
111
|
+
args.push("--n-gpu-layers", String(this.config.gpuLayers));
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const child = spawn(binary, args, {
|
|
115
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
116
|
+
});
|
|
117
|
+
this.child = child;
|
|
118
|
+
this.baseUrl = `http://127.0.0.1:${port}`;
|
|
119
|
+
|
|
120
|
+
child.once("exit", (code, signal) => {
|
|
121
|
+
if (this.child === child) this.child = null;
|
|
122
|
+
if (code !== 0 && code !== null) {
|
|
123
|
+
console.warn(
|
|
124
|
+
`[embedding-server] llama-server exited with code ${code}`,
|
|
125
|
+
);
|
|
126
|
+
} else if (signal) {
|
|
127
|
+
console.warn(`[embedding-server] llama-server exited on ${signal}`);
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
await this.waitUntilReady();
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
private async waitUntilReady(): Promise<void> {
|
|
135
|
+
const deadline = Date.now() + 20_000;
|
|
136
|
+
while (Date.now() < deadline) {
|
|
137
|
+
if (!this.isRunning()) {
|
|
138
|
+
throw new VoiceStartupError(
|
|
139
|
+
"missing-fused-build",
|
|
140
|
+
"[embedding-server] llama-server exited before /health became ready",
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
try {
|
|
144
|
+
const response = await fetch(`${this.baseUrl}/health`);
|
|
145
|
+
if (response.ok) return;
|
|
146
|
+
} catch {
|
|
147
|
+
// Server socket is not open yet.
|
|
148
|
+
}
|
|
149
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
150
|
+
}
|
|
151
|
+
throw new VoiceStartupError(
|
|
152
|
+
"missing-fused-build",
|
|
153
|
+
"[embedding-server] timed out waiting for llama-server /health",
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
async stop(): Promise<void> {
|
|
158
|
+
const child = this.child;
|
|
159
|
+
if (!child) return;
|
|
160
|
+
this.child = null;
|
|
161
|
+
await new Promise<void>((resolve) => {
|
|
162
|
+
child.once("exit", () => resolve());
|
|
163
|
+
child.kill("SIGTERM");
|
|
164
|
+
setTimeout(() => {
|
|
165
|
+
if (child.exitCode === null) child.kill("SIGKILL");
|
|
166
|
+
resolve();
|
|
167
|
+
}, 2_000).unref();
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
export function embeddingServerForRoute(
|
|
173
|
+
route: LocalEmbeddingRoute,
|
|
174
|
+
opts: { gpuLayers?: number | "auto"; threads?: number } = {},
|
|
175
|
+
): EmbeddingServer {
|
|
176
|
+
const modelPath =
|
|
177
|
+
route.source.kind === "pooled-text"
|
|
178
|
+
? route.source.textModelPath
|
|
179
|
+
: route.source.embeddingModelPath;
|
|
180
|
+
return new EmbeddingServer({
|
|
181
|
+
modelPath,
|
|
182
|
+
serverFlags: route.serverFlags,
|
|
183
|
+
gpuLayers: opts.gpuLayers,
|
|
184
|
+
threads: opts.threads,
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
async function reserveTcpPort(): Promise<number> {
|
|
189
|
+
return new Promise((resolve, reject) => {
|
|
190
|
+
const server = net.createServer();
|
|
191
|
+
server.once("error", reject);
|
|
192
|
+
server.listen(0, "127.0.0.1", () => {
|
|
193
|
+
const address = server.address();
|
|
194
|
+
server.close(() => {
|
|
195
|
+
if (address && typeof address === "object") resolve(address.port);
|
|
196
|
+
else reject(new Error("[embedding-server] failed to reserve TCP port"));
|
|
197
|
+
});
|
|
198
|
+
});
|
|
199
|
+
});
|
|
200
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding wiring for Eliza-1 bundles.
|
|
3
|
+
*
|
|
4
|
+
* Per `packages/inference/AGENTS.md` §1:
|
|
5
|
+
* - On the `0_8b` and `2b` tiers the **embedding model IS the text backbone**,
|
|
6
|
+
* served with `--pooling last` — there is no separate `embedding/`
|
|
7
|
+
* GGUF and no duplicate parameters in the mobile/default tiers.
|
|
8
|
+
* - On `4b` and larger tiers, a dedicated
|
|
9
|
+
* `embedding/` GGUF region (Apache-2.0,
|
|
10
|
+
* 1024-dim Matryoshka, 32k ctx) is acquired lazily through the same
|
|
11
|
+
* engine / `SharedResourceRegistry`. **Do not collapse it to pooled
|
|
12
|
+
* text on the larger tiers** — that breaks the 1024-dim Matryoshka
|
|
13
|
+
* contract (B1's verdict).
|
|
14
|
+
*
|
|
15
|
+
* This module is a pure resolver: given a bundle root + tier id it
|
|
16
|
+
* describes *where* embeddings come from (the text GGUF with a pooling
|
|
17
|
+
* flag, or a separate region file) without doing any I/O beyond an
|
|
18
|
+
* `existsSync`. The engine consumes the descriptor to mount the region
|
|
19
|
+
* and the local-embedding route. It also owns the Matryoshka-truncation
|
|
20
|
+
* helper that callers / the vector store use to trade dimensionality for
|
|
21
|
+
* storage (see `EMBEDDING_MATRYOSHKA_DIMS` + `truncateMatryoshka`).
|
|
22
|
+
*/
|
|
23
|
+
import type { Eliza1TierId } from "../catalog";
|
|
24
|
+
/** Bundle-relative directory holding a dedicated embedding GGUF (larger tiers). */
|
|
25
|
+
export declare const EMBEDDING_DIR_REL_PATH = "embedding";
|
|
26
|
+
/** Full output dimensionality of the Eliza-1 embedding model. */
|
|
27
|
+
export declare const EMBEDDING_FULL_DIM: 1024;
|
|
28
|
+
/**
|
|
29
|
+
* Valid Matryoshka truncation points for the Eliza-1 embedding region. The model
|
|
30
|
+
* is trained so that the leading N components of the 1024-dim vector are
|
|
31
|
+
* themselves a usable embedding at these widths; quality degrades
|
|
32
|
+
* gracefully as N shrinks (see the tradeoff table in
|
|
33
|
+
* `reports/porting/2026-05-11/embedding-model-review.md`).
|
|
34
|
+
*
|
|
35
|
+
* 1024 (full) → 768 → 512 → 256 → 128 → 64. Smaller widths than 64 are
|
|
36
|
+
* not part of the published contract.
|
|
37
|
+
*/
|
|
38
|
+
export declare const EMBEDDING_MATRYOSHKA_DIMS: readonly number[];
|
|
39
|
+
/** Type-narrow guard for `EMBEDDING_MATRYOSHKA_DIMS`. */
|
|
40
|
+
export declare function isValidEmbeddingDim(dim: number): boolean;
|
|
41
|
+
/**
|
|
42
|
+
* Truncate a full 1024-dim embedding to one of the Matryoshka widths and
|
|
43
|
+
* L2-renormalize. Renormalization matters: the dedicated embedding outputs are
|
|
44
|
+
* unit-norm at 1024 dims, but the leading slice is *not* unit-norm, and
|
|
45
|
+
* downstream cosine-similarity / dot-product retrieval assumes unit
|
|
46
|
+
* vectors.
|
|
47
|
+
*
|
|
48
|
+
* Throws on an invalid `dim` (must be one of `EMBEDDING_MATRYOSHKA_DIMS`)
|
|
49
|
+
* or when `vec` is shorter than `dim` — no silent truncation-to-whatever
|
|
50
|
+
* or zero-padding (Commandment 8: don't hide a broken pipeline).
|
|
51
|
+
*/
|
|
52
|
+
export declare function truncateMatryoshka(vec: readonly number[], dim: number): number[];
|
|
53
|
+
export type LocalEmbeddingSource = {
|
|
54
|
+
/** `0_8b` / `2b`: reuse the text backbone GGUF; serve with `--pooling last`. */
|
|
55
|
+
readonly kind: "pooled-text";
|
|
56
|
+
readonly textModelPath: string;
|
|
57
|
+
readonly poolingType: "last";
|
|
58
|
+
} | {
|
|
59
|
+
/** Larger tiers: a dedicated `embedding/<name>.gguf` region. */
|
|
60
|
+
readonly kind: "dedicated-region";
|
|
61
|
+
readonly embeddingModelPath: string;
|
|
62
|
+
/** 1024-dim Matryoshka (the published Eliza-1 embedding contract). */
|
|
63
|
+
readonly dimensions: typeof EMBEDDING_FULL_DIM;
|
|
64
|
+
/**
|
|
65
|
+
* The dedicated model already ships a contrastive `last`-token
|
|
66
|
+
* pooling head — `--pooling last` is still passed so llama-server
|
|
67
|
+
* doesn't fall back to the GGUF's metadata default (which for a raw
|
|
68
|
+
* Qwen3 base is `mean`). The model's own pooling layer dominates;
|
|
69
|
+
* this just pins the read.
|
|
70
|
+
*/
|
|
71
|
+
readonly poolingType: "last";
|
|
72
|
+
};
|
|
73
|
+
/**
|
|
74
|
+
* Tiers whose embedding model is the text backbone with `--pooling last`
|
|
75
|
+
* (no separate GGUF). The default mobile tiers deliberately avoid duplicate
|
|
76
|
+
* parameters; larger tiers may use a dedicated embedding region.
|
|
77
|
+
*/
|
|
78
|
+
export declare const POOLED_TEXT_EMBEDDING_TIERS: ReadonlySet<Eliza1TierId>;
|
|
79
|
+
/**
|
|
80
|
+
* Resolve the embedding source for an activated Eliza-1 bundle.
|
|
81
|
+
*
|
|
82
|
+
* @param bundleRoot Bundle directory on disk.
|
|
83
|
+
* @param tierId The Eliza-1 tier id (`eliza-1-0_8b`, ...).
|
|
84
|
+
* @param textModelPath Absolute path of the activated text GGUF (needed for
|
|
85
|
+
* the `pooled-text` case).
|
|
86
|
+
*
|
|
87
|
+
* Hard-fails (AGENTS.md §3) when a larger tier is missing its
|
|
88
|
+
* `embedding/` region — no silent fallback to pooled text, which would
|
|
89
|
+
* regress dimensions from 1024 to whatever the text model emits.
|
|
90
|
+
*/
|
|
91
|
+
export declare function resolveLocalEmbeddingSource(args: {
|
|
92
|
+
bundleRoot: string;
|
|
93
|
+
tierId: Eliza1TierId;
|
|
94
|
+
textModelPath: string;
|
|
95
|
+
}): LocalEmbeddingSource;
|
|
96
|
+
/**
|
|
97
|
+
* Descriptor for the local-embedding route the engine exposes. The
|
|
98
|
+
* route's job is `text[] → number[dim][]`; the runtime mounts the source
|
|
99
|
+
* (pooled text or dedicated region) and forwards. Kept as a plain data
|
|
100
|
+
* shape so both the API layer and tests can assert it without standing up
|
|
101
|
+
* a server.
|
|
102
|
+
*/
|
|
103
|
+
export interface LocalEmbeddingRoute {
|
|
104
|
+
readonly tierId: Eliza1TierId;
|
|
105
|
+
readonly source: LocalEmbeddingSource;
|
|
106
|
+
/** Full output dimensionality the route produces before truncation. 1024 on every tier. */
|
|
107
|
+
readonly dimensions: typeof EMBEDDING_FULL_DIM;
|
|
108
|
+
/**
|
|
109
|
+
* Default Matryoshka width the route returns when a caller does not ask
|
|
110
|
+
* for a smaller `dim`. Always 1024 (= `dimensions`) — callers/the vector
|
|
111
|
+
* store opt into a smaller width for storage savings.
|
|
112
|
+
*/
|
|
113
|
+
readonly defaultDim: number;
|
|
114
|
+
/** The Matryoshka widths a caller may request. */
|
|
115
|
+
readonly matryoshkaDims: readonly number[];
|
|
116
|
+
/**
|
|
117
|
+
* `llama-server` flags for the embedding server process — always
|
|
118
|
+
* `--embeddings --pooling last`. The embedding server is a lazily-started
|
|
119
|
+
* sidecar over the route's GGUF (the text backbone on `0_8b` / `2b`, the
|
|
120
|
+
* `embedding/` GGUF on larger tiers); see `embedding-server.ts`. The
|
|
121
|
+
* chat `llama-server` is left untouched (completions-only) — these flags
|
|
122
|
+
* do NOT go on it.
|
|
123
|
+
*/
|
|
124
|
+
readonly serverFlags: ReadonlyArray<string>;
|
|
125
|
+
}
|
|
126
|
+
export declare function buildLocalEmbeddingRoute(args: {
|
|
127
|
+
bundleRoot: string;
|
|
128
|
+
tierId: Eliza1TierId;
|
|
129
|
+
textModelPath: string;
|
|
130
|
+
/** Default output width; must be one of `EMBEDDING_MATRYOSHKA_DIMS`. Defaults to 1024. */
|
|
131
|
+
defaultDim?: number;
|
|
132
|
+
}): LocalEmbeddingRoute;
|
|
133
|
+
//# sourceMappingURL=embedding.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding.d.ts","sourceRoot":"","sources":["embedding.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAIH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAG/C,mFAAmF;AACnF,eAAO,MAAM,sBAAsB,cAAc,CAAC;AAElD,iEAAiE;AACjE,eAAO,MAAM,kBAAkB,EAAG,IAAa,CAAC;AAEhD;;;;;;;;;GASG;AACH,eAAO,MAAM,yBAAyB,EAAE,SAAS,MAAM,EAEtD,CAAC;AAEF,yDAAyD;AACzD,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAExD;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,kBAAkB,CACjC,GAAG,EAAE,SAAS,MAAM,EAAE,EACtB,GAAG,EAAE,MAAM,GACT,MAAM,EAAE,CAiBV;AAYD,MAAM,MAAM,oBAAoB,GAC7B;IACA,gFAAgF;IAChF,QAAQ,CAAC,IAAI,EAAE,aAAa,CAAC;IAC7B,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC5B,GACD;IACA,gEAAgE;IAChE,QAAQ,CAAC,IAAI,EAAE,kBAAkB,CAAC;IAClC,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;IACpC,sEAAsE;IACtE,QAAQ,CAAC,UAAU,EAAE,OAAO,kBAAkB,CAAC;IAC/C;;;;;;OAMG;IACH,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC5B,CAAC;AAaL;;;;GAIG;AACH,eAAO,MAAM,2BAA2B,EAAE,WAAW,CAAC,YAAY,CAGhE,CAAC;AAEH;;;;;;;;;;;GAWG;AACH,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IACjD,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;CACtB,GAAG,oBAAoB,CA4BvB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,mBAAmB;IACnC,QAAQ,CAAC,MAAM,EAAE,YAAY,CAAC;IAC9B,QAAQ,CAAC,MAAM,EAAE,oBAAoB,CAAC;IACtC,2FAA2F;IAC3F,QAAQ,CAAC,UAAU,EAAE,OAAO,kBAAkB,CAAC;IAC/C;;;;OAIG;IACH,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,kDAAkD;IAClD,QAAQ,CAAC,cAAc,EAAE,SAAS,MAAM,EAAE,CAAC;IAC3C;;;;;;;OAOG;IACH,QAAQ,CAAC,WAAW,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;CAC5C;AAED,wBAAgB,wBAAwB,CAAC,IAAI,EAAE;IAC9C,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,0FAA0F;IAC1F,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB,GAAG,mBAAmB,CAoBtB"}
|