@vellumai/assistant 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +273 -10
- package/Dockerfile +2 -3
- package/bun.lock +5 -13
- package/docs/backup-troubleshooting.md +52 -0
- package/docs/browser-use-architecture-phase2.md +174 -0
- package/docs/stt-provider-onboarding.md +120 -0
- package/knip.json +12 -2
- package/node_modules/@vellumai/ces-contracts/bun.lock +8 -6
- package/node_modules/@vellumai/ces-contracts/package.json +3 -3
- package/openapi.yaml +982 -72
- package/package.json +4 -6
- package/scripts/generate-openapi.ts +0 -1
- package/scripts/test.sh +73 -18
- package/src/__tests__/agent-image-optimize.test.ts +28 -0
- package/src/__tests__/agent-loop.test.ts +123 -0
- package/src/__tests__/anthropic-provider.test.ts +263 -10
- package/src/__tests__/auto-analysis-end-to-end.test.ts +550 -0
- package/src/__tests__/auto-analysis-prompt.test.ts +50 -0
- package/src/__tests__/browser-fill-credential.test.ts +11 -0
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
- package/src/__tests__/browser-skill-endstate.test.ts +31 -7
- package/src/__tests__/btw-routes.test.ts +7 -0
- package/src/__tests__/call-controller.test.ts +581 -20
- package/src/__tests__/catalog-files.test.ts +138 -0
- package/src/__tests__/channel-invite-transport.test.ts +2 -2
- package/src/__tests__/channel-readiness-routes.test.ts +16 -20
- package/src/__tests__/channel-readiness-service.test.ts +12 -7
- package/src/__tests__/checker.test.ts +157 -10
- package/src/__tests__/clawhub-files.test.ts +347 -0
- package/src/__tests__/commit-message-enrichment-service.test.ts +36 -19
- package/src/__tests__/config-analysis.test.ts +100 -0
- package/src/__tests__/config-schema.test.ts +1013 -66
- package/src/__tests__/config-watcher-cleanup-throttle.test.ts +339 -0
- package/src/__tests__/config-watcher.test.ts +43 -8
- package/src/__tests__/contact-store-user-file.test.ts +512 -0
- package/src/__tests__/contacts-write.test.ts +197 -0
- package/src/__tests__/context-window-manager.test.ts +88 -0
- package/src/__tests__/conversation-abort-tool-results.test.ts +2 -0
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -0
- package/src/__tests__/conversation-agent-loop.test.ts +98 -2
- package/src/__tests__/conversation-confirmation-signals.test.ts +135 -0
- package/src/__tests__/conversation-error.test.ts +70 -0
- package/src/__tests__/conversation-history-web-search.test.ts +11 -4
- package/src/__tests__/conversation-init.benchmark.test.ts +6 -1
- package/src/__tests__/conversation-launcher-skill-regression.test.ts +51 -0
- package/src/__tests__/conversation-list-source.test.ts +145 -0
- package/src/__tests__/conversation-pre-run-repair.test.ts +2 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +2 -0
- package/src/__tests__/conversation-queue.test.ts +901 -60
- package/src/__tests__/conversation-routes-disk-view.test.ts +270 -0
- package/src/__tests__/conversation-runtime-assembly.test.ts +55 -0
- package/src/__tests__/conversation-skill-tools.test.ts +7 -4
- package/src/__tests__/conversation-slash-commands.test.ts +33 -0
- package/src/__tests__/conversation-slash-queue.test.ts +89 -18
- package/src/__tests__/conversation-slash-unknown.test.ts +2 -0
- package/src/__tests__/conversation-tool-setup-batch-authorized.test.ts +226 -0
- package/src/__tests__/conversation-workspace-injection.test.ts +2 -0
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +2 -0
- package/src/__tests__/credential-health-service.test.ts +352 -0
- package/src/__tests__/credential-security-invariants.test.ts +5 -3
- package/src/__tests__/credential-vault-unit.test.ts +379 -3
- package/src/__tests__/credentials-cli.test.ts +40 -16
- package/src/__tests__/cross-provider-web-search.test.ts +146 -35
- package/src/__tests__/deterministic-verification-control-plane.test.ts +10 -1
- package/src/__tests__/device-id.test.ts +112 -0
- package/src/__tests__/docker-signing-key-bootstrap.test.ts +167 -4
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +1 -3
- package/src/__tests__/email-html-renderer.test.ts +71 -0
- package/src/__tests__/email-invite-adapter.test.ts +36 -32
- package/src/__tests__/emit-event-signal.test.ts +71 -0
- package/src/__tests__/extension-id-sync-guard.test.ts +75 -8
- package/src/__tests__/fixtures/mock-chrome-extension.ts +11 -0
- package/src/__tests__/gateway-only-enforcement.test.ts +206 -1
- package/src/__tests__/gateway-only-guard.test.ts +0 -1
- package/src/__tests__/gemini-provider.test.ts +64 -0
- package/src/__tests__/get-skill-detail-audit.test.ts +325 -0
- package/src/__tests__/gmail-archive-fallback.test.ts +193 -0
- package/src/__tests__/gmail-archive-gate.test.ts +246 -0
- package/src/__tests__/gmail-preferences.test.ts +117 -0
- package/src/__tests__/headless-browser-interactions.test.ts +43 -0
- package/src/__tests__/headless-browser-mode.test.ts +614 -0
- package/src/__tests__/headless-browser-navigate.test.ts +142 -5
- package/src/__tests__/headless-browser-read-tools.test.ts +11 -0
- package/src/__tests__/headless-browser-snapshot.test.ts +10 -0
- package/src/__tests__/heartbeat-service.test.ts +70 -17
- package/src/__tests__/home-state-routes.test.ts +162 -0
- package/src/__tests__/host-bash-proxy.test.ts +0 -5
- package/src/__tests__/host-browser-e2e-cloud.test.ts +138 -4
- package/src/__tests__/host-browser-e2e-self-hosted.test.ts +4 -4
- package/src/__tests__/host-browser-ws-events-e2e.test.ts +103 -0
- package/src/__tests__/host-cu-proxy.test.ts +0 -5
- package/src/__tests__/identity-intro-cache.test.ts +40 -10
- package/src/__tests__/init-feature-flag-overrides.test.ts +38 -112
- package/src/__tests__/jobs-store-upsert-debounced.test.ts +141 -0
- package/src/__tests__/llm-context-normalization.test.ts +488 -0
- package/src/__tests__/llm-context-route-provider.test.ts +86 -5
- package/src/__tests__/llm-usage-store.test.ts +363 -0
- package/src/__tests__/media-stream-output.test.ts +555 -0
- package/src/__tests__/media-stream-parser.test.ts +374 -0
- package/src/__tests__/media-stream-server-integration.test.ts +1234 -0
- package/src/__tests__/media-stream-stt-session.test.ts +588 -0
- package/src/__tests__/media-turn-detector.test.ts +440 -0
- package/src/__tests__/message-queue.test.ts +125 -0
- package/src/__tests__/migration-export-http.test.ts +6 -6
- package/src/__tests__/migration-import-commit-http.test.ts +8 -6
- package/src/__tests__/migration-import-preflight-http.test.ts +6 -5
- package/src/__tests__/migration-validate-http.test.ts +3 -3
- package/src/__tests__/mock-gateway-ipc.ts +151 -0
- package/src/__tests__/model-intents.test.ts +2 -2
- package/src/__tests__/oauth-apps-routes.test.ts +1 -0
- package/src/__tests__/oauth-cli.test.ts +2 -0
- package/src/__tests__/oauth-connect-orchestrator.test.ts +2 -0
- package/src/__tests__/oauth-provider-serializer.test.ts +1 -0
- package/src/__tests__/oauth-providers-routes.test.ts +2 -0
- package/src/__tests__/oauth-store.test.ts +85 -0
- package/src/__tests__/oauth2-gateway-transport.test.ts +249 -6
- package/src/__tests__/onboarding-template-contract.test.ts +6 -13
- package/src/__tests__/openai-provider.test.ts +176 -0
- package/src/__tests__/openai-responses-cutover-guard.test.ts +184 -0
- package/src/__tests__/openai-responses-provider.test.ts +1105 -0
- package/src/__tests__/openrouter-token-estimation.test.ts +100 -0
- package/src/__tests__/outlook-unsubscribe.test.ts +31 -2
- package/src/__tests__/persona-resolver.test.ts +251 -0
- package/src/__tests__/platform-bash-auto-approve.test.ts +4 -0
- package/src/__tests__/platform.test.ts +92 -1
- package/src/__tests__/post-turn-tool-result-truncation.test.ts +47 -0
- package/src/__tests__/prechat-onboarding-contract.test.ts +267 -0
- package/src/__tests__/pricing.test.ts +174 -0
- package/src/__tests__/qdrant-manager.test.ts +29 -8
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +194 -0
- package/src/__tests__/relationship-state-contract.test.ts +175 -0
- package/src/__tests__/relay-server.test.ts +423 -5
- package/src/__tests__/search-skills-unified.test.ts +118 -0
- package/src/__tests__/secret-scanner-executor.test.ts +4 -0
- package/src/__tests__/secure-keys.test.ts +107 -0
- package/src/__tests__/send-endpoint-busy.test.ts +5 -1
- package/src/__tests__/sequence-store.test.ts +1 -1
- package/src/__tests__/server-history-render.test.ts +49 -0
- package/src/__tests__/settings-routes.test.ts +201 -0
- package/src/__tests__/skill-load-feature-flag.test.ts +1 -0
- package/src/__tests__/skills-file-content-endpoint.test.ts +276 -145
- package/src/__tests__/skills-files-catalog-fallback.test.ts +381 -93
- package/src/__tests__/skills.test.ts +5 -2
- package/src/__tests__/skillssh-files.test.ts +446 -0
- package/src/__tests__/slack-block-formatting.test.ts +110 -0
- package/src/__tests__/slack-channel-config.test.ts +564 -1
- package/src/__tests__/stt-catalog-parity.test.ts +282 -0
- package/src/__tests__/stt-stream-session.test.ts +535 -0
- package/src/__tests__/system-prompt.test.ts +112 -26
- package/src/__tests__/telephony-stt-routing.test.ts +329 -0
- package/src/__tests__/terminal-tools.test.ts +18 -7
- package/src/__tests__/test-preload.ts +18 -0
- package/src/__tests__/test-support/browser-skill-harness.ts +4 -1
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +9 -5
- package/src/__tests__/tool-executor-shell-integration.test.ts +4 -0
- package/src/__tests__/tool-executor.test.ts +33 -24
- package/src/__tests__/tool-result-truncation.test.ts +36 -0
- package/src/__tests__/trust-store.test.ts +7 -1
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +1 -1
- package/src/__tests__/tts-catalog-parity.test.ts +345 -0
- package/src/__tests__/twilio-routes-twiml.test.ts +512 -114
- package/src/__tests__/twilio-routes.test.ts +376 -0
- package/src/__tests__/unicode.test.ts +293 -0
- package/src/__tests__/update-bulletin-format.test.ts +59 -0
- package/src/__tests__/update-bulletin.test.ts +206 -5
- package/src/__tests__/usage-routes.test.ts +25 -4
- package/src/__tests__/user-reference.test.ts +46 -61
- package/src/__tests__/verification-control-plane-policy.test.ts +4 -0
- package/src/__tests__/voice-config-update.test.ts +403 -0
- package/src/__tests__/voice-quality.test.ts +434 -19
- package/src/__tests__/workspace-heartbeat-service.test.ts +7 -0
- package/src/__tests__/workspace-migration-033-stt-service-explicit-config.test.ts +547 -0
- package/src/__tests__/workspace-migration-034-remove-calls-voice-transcription-provider.test.ts +596 -0
- package/src/__tests__/workspace-migration-drop-user-md.test.ts +368 -0
- package/src/__tests__/workspace-migration-meets.test.ts +244 -0
- package/src/__tests__/workspace-migration-seed-device-id.test.ts +14 -20
- package/src/__tests__/workspace-policy.test.ts +2 -0
- package/src/agent/image-optimize.ts +24 -12
- package/src/agent/loop.ts +43 -3
- package/src/backup/__tests__/backup-key.test.ts +152 -0
- package/src/backup/__tests__/backup-worker.test.ts +767 -0
- package/src/backup/__tests__/list-snapshots.test.ts +87 -0
- package/src/backup/__tests__/local-writer.test.ts +218 -0
- package/src/backup/__tests__/offsite-writer.test.ts +641 -0
- package/src/backup/__tests__/paths.test.ts +300 -0
- package/src/backup/__tests__/restore.test.ts +498 -0
- package/src/backup/__tests__/snapshot-lock.test.ts +352 -0
- package/src/backup/__tests__/stream-crypt.test.ts +228 -0
- package/src/backup/backup-key.ts +137 -0
- package/src/backup/backup-worker.ts +459 -0
- package/src/backup/list-snapshots.ts +147 -0
- package/src/backup/local-writer.ts +133 -0
- package/src/backup/offsite-writer.ts +222 -0
- package/src/backup/paths.ts +226 -0
- package/src/backup/restore.ts +322 -0
- package/src/backup/snapshot-lock.ts +431 -0
- package/src/backup/stream-crypt.ts +263 -0
- package/src/bundler/package-resolver.ts +4 -0
- package/src/calls/audio-store.ts +11 -5
- package/src/calls/call-controller.ts +226 -71
- package/src/calls/call-domain.ts +9 -0
- package/src/calls/call-speech-output.ts +190 -0
- package/src/calls/call-transport.ts +77 -0
- package/src/calls/media-stream-audio-transcode.ts +173 -0
- package/src/calls/media-stream-output.ts +660 -0
- package/src/calls/media-stream-parser.ts +300 -0
- package/src/calls/media-stream-protocol.ts +166 -0
- package/src/calls/media-stream-server.ts +592 -0
- package/src/calls/media-stream-stt-session.ts +460 -0
- package/src/calls/media-turn-detector.ts +230 -0
- package/src/calls/relay-server.ts +90 -75
- package/src/calls/resolve-call-tts-provider.ts +136 -0
- package/src/calls/telephony-stt-routing.ts +145 -0
- package/src/calls/tts-call-strategy.ts +161 -0
- package/src/calls/tts-text-sanitizer.ts +32 -16
- package/src/calls/twilio-routes.ts +281 -17
- package/src/calls/voice-quality.ts +78 -35
- package/src/calls/voice-session-bridge.ts +8 -1
- package/src/channels/types.ts +16 -0
- package/src/cli/__tests__/run-assistant-command.ts +11 -1
- package/src/cli/commands/__tests__/backup.test.ts +1165 -0
- package/src/cli/commands/__tests__/domain-register.test.ts +234 -0
- package/src/cli/commands/__tests__/domain-status.test.ts +132 -0
- package/src/cli/commands/__tests__/email-attachment.test.ts +422 -0
- package/src/cli/commands/__tests__/email-download.test.ts +16 -1
- package/src/cli/commands/__tests__/email-list.test.ts +22 -4
- package/src/cli/commands/__tests__/email-register.test.ts +4 -4
- package/src/cli/commands/__tests__/email-send.test.ts +37 -4
- package/src/cli/commands/__tests__/email-status.test.ts +5 -1
- package/src/cli/commands/__tests__/email-unregister.test.ts +34 -5
- package/src/cli/commands/backup.ts +993 -0
- package/src/cli/commands/conversations.ts +77 -0
- package/src/cli/commands/credentials.ts +0 -1
- package/src/cli/commands/domain.ts +210 -0
- package/src/cli/commands/email.ts +255 -3
- package/src/cli/commands/oauth/__tests__/connect.test.ts +12 -0
- package/src/cli/commands/oauth/__tests__/providers-delete.test.ts +1 -0
- package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -0
- package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -0
- package/src/cli/commands/oauth/mode.ts +12 -3
- package/src/cli/commands/oauth/providers.ts +15 -0
- package/src/cli/commands/oauth/shared.ts +2 -1
- package/src/cli/commands/platform/__tests__/callback-routes-list.test.ts +4 -9
- package/src/cli/commands/platform/__tests__/connect.test.ts +6 -0
- package/src/cli/commands/platform/__tests__/disconnect.test.ts +7 -1
- package/src/cli/commands/platform/__tests__/status.test.ts +6 -0
- package/src/cli/program.ts +30 -4
- package/src/config/__tests__/backup-schema.test.ts +134 -0
- package/src/config/assistant-feature-flags.ts +61 -62
- package/src/config/bundled-skills/app-builder/references/CUSTOM_ROUTES.md +37 -1
- package/src/config/bundled-skills/browser/SKILL.md +30 -5
- package/src/config/bundled-skills/browser/TOOLS.json +123 -0
- package/src/config/bundled-skills/browser/tools/browser-attach.ts +12 -0
- package/src/config/bundled-skills/browser/tools/browser-detach.ts +12 -0
- package/src/config/bundled-skills/browser/tools/browser-status.ts +12 -0
- package/src/config/bundled-skills/browser/tools/browser-wait-for-download.ts +17 -0
- package/src/config/bundled-skills/contacts/SKILL.md +2 -2
- package/src/config/bundled-skills/gmail/SKILL.md +53 -7
- package/src/config/bundled-skills/gmail/TOOLS.json +33 -3
- package/src/config/bundled-skills/gmail/tools/gmail-archive.ts +116 -9
- package/src/config/bundled-skills/gmail/tools/gmail-outreach-scan.ts +138 -11
- package/src/config/bundled-skills/gmail/tools/gmail-preferences-tool.ts +59 -0
- package/src/config/bundled-skills/gmail/tools/gmail-preferences.ts +82 -0
- package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +113 -17
- package/src/config/bundled-skills/gmail/tools/gmail-unsubscribe.ts +2 -2
- package/src/config/bundled-skills/media-processing/SKILL.md +3 -9
- package/src/config/bundled-skills/media-processing/TOOLS.json +1 -6
- package/src/config/bundled-skills/media-processing/__tests__/audio-transcribe.test.ts +125 -0
- package/src/config/bundled-skills/media-processing/__tests__/extract-keyframes.test.ts +181 -0
- package/src/config/bundled-skills/media-processing/__tests__/preprocess-audio.test.ts +141 -0
- package/src/config/bundled-skills/media-processing/services/audio-transcribe.ts +32 -87
- package/src/config/bundled-skills/media-processing/services/preprocess.ts +8 -4
- package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +0 -10
- package/src/config/bundled-skills/messaging/SKILL.md +3 -3
- package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +2 -2
- package/src/config/bundled-skills/outlook/SKILL.md +2 -2
- package/src/config/bundled-skills/outlook/tools/outlook-unsubscribe.ts +2 -2
- package/src/config/bundled-skills/phone-calls/SKILL.md +2 -2
- package/src/config/bundled-skills/phone-calls/references/CONFIG.md +27 -18
- package/src/config/bundled-skills/phone-calls/references/TROUBLESHOOTING.md +3 -3
- package/src/config/bundled-skills/settings/TOOLS.json +3 -3
- package/src/config/bundled-skills/settings/tools/voice-config-update.ts +26 -22
- package/src/config/bundled-skills/slack/SKILL.md +1 -0
- package/src/config/bundled-skills/transcribe/SKILL.md +9 -14
- package/src/config/bundled-skills/transcribe/TOOLS.json +2 -7
- package/src/config/bundled-skills/transcribe/tools/transcribe-media.test.ts +256 -0
- package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +38 -188
- package/src/config/bundled-tool-registry.ts +8 -0
- package/src/config/env-registry.ts +24 -0
- package/src/config/env.ts +34 -10
- package/src/config/feature-flag-registry.json +46 -14
- package/src/config/loader.ts +26 -12
- package/src/config/schema.ts +35 -10
- package/src/config/schemas/__tests__/stt.test.ts +43 -0
- package/src/config/schemas/analysis.ts +51 -0
- package/src/config/schemas/backup.ts +72 -0
- package/src/config/schemas/calls.ts +1 -26
- package/src/config/schemas/elevenlabs.ts +0 -59
- package/src/config/schemas/filing.ts +47 -7
- package/src/config/schemas/heartbeat.ts +27 -5
- package/src/config/schemas/host-browser.ts +47 -1
- package/src/config/schemas/inference.ts +1 -1
- package/src/config/schemas/memory-lifecycle.ts +14 -2
- package/src/config/schemas/services.ts +44 -0
- package/src/config/schemas/stt.ts +59 -0
- package/src/config/schemas/tts.ts +230 -0
- package/src/config/schemas/updates.ts +14 -0
- package/src/config/skills.ts +4 -0
- package/src/config/types.ts +4 -0
- package/src/contacts/contact-store.ts +56 -11
- package/src/contacts/contacts-write.ts +38 -1
- package/src/context/post-turn-tool-result-truncation.ts +3 -2
- package/src/context/tool-result-truncation.ts +2 -1
- package/src/context/window-manager.ts +45 -12
- package/src/credential-execution/executable-discovery.ts +12 -2
- package/src/credential-execution/process-manager.ts +33 -2
- package/src/credential-health/credential-health-service.ts +366 -0
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +324 -0
- package/src/daemon/__tests__/conversation-surfaces-launch.test.ts +497 -0
- package/src/daemon/__tests__/conversation-tool-setup.test.ts +17 -8
- package/src/daemon/__tests__/lifecycle-startup-ordering.test.ts +127 -0
- package/src/daemon/config-watcher.ts +99 -5
- package/src/daemon/conversation-agent-loop-handlers.ts +6 -0
- package/src/daemon/conversation-agent-loop.ts +101 -24
- package/src/daemon/conversation-error.ts +11 -0
- package/src/daemon/conversation-history.ts +40 -6
- package/src/daemon/conversation-launch.ts +220 -0
- package/src/daemon/conversation-lifecycle.ts +59 -9
- package/src/daemon/conversation-messaging.ts +37 -3
- package/src/daemon/conversation-notifiers.ts +5 -0
- package/src/daemon/conversation-process.ts +581 -19
- package/src/daemon/conversation-queue-manager.ts +24 -0
- package/src/daemon/conversation-runtime-assembly.ts +11 -1
- package/src/daemon/conversation-slash.ts +36 -0
- package/src/daemon/conversation-surfaces.ts +94 -4
- package/src/daemon/conversation-tool-setup.ts +25 -0
- package/src/daemon/conversation-usage.ts +7 -4
- package/src/daemon/conversation.ts +86 -28
- package/src/daemon/handlers/config-slack-channel.ts +269 -94
- package/src/daemon/handlers/conversations.ts +4 -1
- package/src/daemon/handlers/shared.ts +22 -0
- package/src/daemon/handlers/skills.ts +321 -77
- package/src/daemon/host-browser-proxy.ts +2 -1
- package/src/daemon/lifecycle.ts +122 -25
- package/src/daemon/message-protocol.ts +6 -0
- package/src/daemon/message-types/conversations.ts +34 -1
- package/src/daemon/message-types/home.ts +40 -0
- package/src/daemon/message-types/meet.ts +143 -0
- package/src/daemon/message-types/messages.ts +14 -0
- package/src/daemon/message-types/schedules.ts +34 -2
- package/src/daemon/message-types/skills.ts +16 -0
- package/src/daemon/message-types/surfaces.ts +2 -0
- package/src/daemon/server.ts +347 -2
- package/src/daemon/shutdown-handlers.ts +32 -4
- package/src/daemon/shutdown-registry.ts +40 -0
- package/src/daemon/tool-side-effects.ts +9 -0
- package/src/email/html-renderer.ts +76 -0
- package/src/heartbeat/heartbeat-service.ts +93 -7
- package/src/home/__tests__/assistant-feed-authoring.test.ts +156 -0
- package/src/home/__tests__/emit-feed-event.test.ts +169 -0
- package/src/home/__tests__/feed-scheduler.test.ts +194 -0
- package/src/home/__tests__/feed-types.test.ts +275 -0
- package/src/home/__tests__/feed-writer.test.ts +688 -0
- package/src/home/__tests__/phase5-exit-criteria.test.ts +212 -0
- package/src/home/__tests__/platform-gmail-digest.test.ts +222 -0
- package/src/home/__tests__/progress-formula.test.ts +213 -0
- package/src/home/__tests__/relationship-state-writer.test.ts +740 -0
- package/src/home/__tests__/rollup-producer.test.ts +398 -0
- package/src/home/assistant-feed-authoring.ts +124 -0
- package/src/home/emit-feed-event.ts +158 -0
- package/src/home/feed-scheduler.ts +247 -0
- package/src/home/feed-types.ts +181 -0
- package/src/home/feed-writer.ts +469 -0
- package/src/home/platform-gmail-digest.ts +163 -0
- package/src/home/progress-formula.ts +86 -0
- package/src/home/relationship-state-writer.ts +824 -0
- package/src/home/relationship-state.ts +143 -0
- package/src/home/rollup-producer.ts +384 -0
- package/src/hooks/runner.ts +7 -0
- package/src/inbound/platform-callback-registration.ts +12 -3
- package/src/inbound/public-ingress-urls.ts +12 -0
- package/src/instrument.ts +1 -1
- package/src/ipc/__tests__/cli-ipc.test.ts +200 -0
- package/src/ipc/cli-client.ts +151 -0
- package/src/ipc/cli-server.ts +234 -0
- package/src/ipc/gateway-client.ts +180 -0
- package/src/ipc/routes/index.ts +5 -0
- package/src/ipc/routes/wake-conversation.ts +19 -0
- package/src/memory/__tests__/auto-analysis-enqueue.test.ts +356 -0
- package/src/memory/__tests__/auto-analysis-guard.test.ts +57 -0
- package/src/memory/__tests__/conversation-analyze-job.test.ts +232 -0
- package/src/memory/__tests__/find-analysis-conversation.test.ts +196 -0
- package/src/memory/app-store.ts +1 -1
- package/src/memory/attachments-store.ts +70 -0
- package/src/memory/auto-analysis-enqueue.ts +127 -0
- package/src/memory/auto-analysis-guard.ts +27 -0
- package/src/memory/cleanup-schedule-state.ts +37 -0
- package/src/memory/conversation-analyze-job.ts +73 -0
- package/src/memory/conversation-crud.ts +99 -0
- package/src/memory/conversation-disk-view.ts +7 -0
- package/src/memory/conversation-group-migration.ts +34 -2
- package/src/memory/conversation-queries.ts +6 -5
- package/src/memory/db-init.ts +6 -0
- package/src/memory/db-maintenance.ts +108 -0
- package/src/memory/db.ts +1 -0
- package/src/memory/graph/conversation-graph-memory.ts +15 -0
- package/src/memory/graph/extraction.test.ts +23 -0
- package/src/memory/graph/extraction.ts +8 -0
- package/src/memory/graph/retriever.ts +27 -18
- package/src/memory/graph/scoring.test.ts +186 -0
- package/src/memory/graph/scoring.ts +31 -1
- package/src/memory/graph/tools.ts +1 -1
- package/src/memory/group-crud.ts +6 -1
- package/src/memory/indexer.ts +95 -16
- package/src/memory/job-handlers/cleanup.ts +11 -8
- package/src/memory/job-handlers/conversation-starters.ts +16 -10
- package/src/memory/jobs-store.ts +64 -4
- package/src/memory/jobs-worker.ts +22 -9
- package/src/memory/llm-usage-store.ts +92 -56
- package/src/memory/migrations/219-oauth-providers-token-exchange-body-format.ts +15 -0
- package/src/memory/migrations/220-normalize-user-file-by-principal.ts +190 -0
- package/src/memory/migrations/221-conversations-archived-at.ts +16 -0
- package/src/memory/migrations/index.ts +6 -0
- package/src/memory/migrations/registry.ts +8 -0
- package/src/memory/qdrant-manager.ts +43 -16
- package/src/memory/schema/conversations.ts +2 -0
- package/src/memory/schema/oauth.ts +3 -0
- package/src/memory/usage-buckets.ts +396 -0
- package/src/messaging/providers/gmail/client.ts +57 -6
- package/src/messaging/providers/slack/__tests__/adapter-token-routing.test.ts +282 -0
- package/src/messaging/providers/slack/adapter.ts +143 -38
- package/src/messaging/providers/slack/client.ts +16 -0
- package/src/messaging/providers/slack/types.ts +4 -0
- package/src/notifications/decision-engine.ts +3 -3
- package/src/notifications/signal.ts +5 -0
- package/src/oauth/__tests__/identity-verifier.test.ts +1 -0
- package/src/oauth/byo-connection.test.ts +18 -1
- package/src/oauth/byo-connection.ts +3 -1
- package/src/oauth/connect-orchestrator.ts +2 -0
- package/src/oauth/connection-resolver.ts +6 -2
- package/src/oauth/connection.ts +2 -0
- package/src/oauth/oauth-store.ts +9 -0
- package/src/oauth/platform-connection.test.ts +98 -0
- package/src/oauth/platform-connection.ts +52 -31
- package/src/oauth/seed-providers.ts +7 -0
- package/src/permissions/checker.ts +16 -6
- package/src/permissions/defaults.ts +49 -1
- package/src/permissions/trust-store.ts +3 -3
- package/src/permissions/workspace-policy.ts +3 -0
- package/src/platform/client.test.ts +10 -0
- package/src/platform/sync-identity.ts +129 -0
- package/src/prompts/persona-resolver.ts +126 -2
- package/src/prompts/system-prompt.ts +59 -18
- package/src/prompts/templates/BOOTSTRAP.md +5 -5
- package/src/prompts/templates/SOUL.md +3 -1
- package/src/prompts/templates/UPDATES.md +12 -0
- package/src/prompts/templates/channels/slack.md +20 -0
- package/src/prompts/update-bulletin-format.ts +26 -9
- package/src/prompts/update-bulletin.ts +34 -23
- package/src/prompts/user-reference.ts +20 -17
- package/src/providers/__tests__/provider-secret-catalog.test.ts +42 -0
- package/src/providers/anthropic/client.ts +157 -61
- package/src/providers/fireworks/client.ts +2 -2
- package/src/providers/gemini/client.ts +9 -1
- package/src/providers/model-catalog.ts +6 -0
- package/src/providers/model-intents.ts +4 -4
- package/src/providers/ollama/client.ts +2 -2
- package/src/providers/openai/chat-completions-provider.ts +474 -0
- package/src/providers/openai/client.ts +25 -440
- package/src/providers/openai/responses-provider.ts +502 -0
- package/src/providers/openrouter/client.ts +101 -4
- package/src/providers/provider-secret-catalog.ts +139 -0
- package/src/providers/registry.ts +2 -2
- package/src/providers/retry.ts +14 -3
- package/src/providers/speech-to-text/__tests__/provider-catalog.test.ts +251 -0
- package/src/providers/speech-to-text/__tests__/resolve.test.ts +828 -0
- package/src/providers/speech-to-text/deepgram-realtime.test.ts +980 -0
- package/src/providers/speech-to-text/deepgram-realtime.ts +767 -0
- package/src/providers/speech-to-text/deepgram.test.ts +332 -0
- package/src/providers/speech-to-text/deepgram.ts +115 -0
- package/src/providers/speech-to-text/google-gemini-live-stream.test.ts +743 -0
- package/src/providers/speech-to-text/google-gemini-live-stream.ts +625 -0
- package/src/providers/speech-to-text/google-gemini.test.ts +226 -0
- package/src/providers/speech-to-text/google-gemini.ts +101 -0
- package/src/providers/speech-to-text/openai-whisper-stream.test.ts +564 -0
- package/src/providers/speech-to-text/openai-whisper-stream.ts +381 -0
- package/src/providers/speech-to-text/openai-whisper.test.ts +1 -37
- package/src/providers/speech-to-text/openai-whisper.ts +63 -33
- package/src/providers/speech-to-text/provider-catalog.ts +306 -0
- package/src/providers/speech-to-text/resolve.ts +386 -6
- package/src/providers/types.ts +9 -0
- package/src/runtime/AGENTS.md +43 -1
- package/src/runtime/__tests__/agent-wake.test.ts +831 -0
- package/src/runtime/__tests__/runtime-mode.test.ts +62 -0
- package/src/runtime/__tests__/slack-block-formatting.test.ts +481 -0
- package/src/runtime/agent-wake.ts +512 -0
- package/src/runtime/auth/__tests__/route-policy.test.ts +40 -0
- package/src/runtime/auth/route-policy.ts +30 -5
- package/src/runtime/auth/token-service.ts +56 -1
- package/src/runtime/btw-sidechain.ts +2 -0
- package/src/runtime/capability-tokens.ts +10 -10
- package/src/runtime/channel-invite-transport.ts +1 -1
- package/src/runtime/channel-invite-transports/email.ts +14 -6
- package/src/runtime/channel-readiness-service.ts +12 -22
- package/src/runtime/chrome-extension-registry.ts +38 -2
- package/src/runtime/http-server.ts +395 -10
- package/src/runtime/http-types.ts +6 -2
- package/src/runtime/migrations/__tests__/vbundle-import-credentials.test.ts +36 -0
- package/src/runtime/migrations/__tests__/vbundle-legacy-user-md.test.ts +360 -0
- package/src/runtime/migrations/migration-transport.ts +1 -0
- package/src/runtime/migrations/migration-wizard.ts +1 -0
- package/src/runtime/migrations/vbundle-import-analyzer.ts +77 -1
- package/src/runtime/migrations/vbundle-importer.ts +34 -0
- package/src/runtime/pending-interactions.ts +0 -11
- package/src/runtime/routes/__tests__/backup-routes.test.ts +967 -0
- package/src/runtime/routes/__tests__/home-feed-routes.test.ts +507 -0
- package/src/runtime/routes/__tests__/migration-import-credential-filter.test.ts +208 -0
- package/src/runtime/routes/__tests__/stt-routes.test.ts +406 -0
- package/src/runtime/routes/__tests__/tts-routes.test.ts +474 -0
- package/src/runtime/routes/__tests__/user-route-dispatcher.test.ts +148 -17
- package/src/runtime/routes/app-management-routes.ts +12 -18
- package/src/runtime/routes/attachment-routes.test.ts +9 -3
- package/src/runtime/routes/attachment-routes.ts +216 -17
- package/src/runtime/routes/backup-routes.ts +519 -0
- package/src/runtime/routes/browser-extension-pair-routes.ts +82 -23
- package/src/runtime/routes/btw-routes.ts +8 -6
- package/src/runtime/routes/contact-routes.test.ts +298 -0
- package/src/runtime/routes/contact-routes.ts +132 -5
- package/src/runtime/routes/conversation-analysis-routes.ts +22 -142
- package/src/runtime/routes/conversation-management-routes.ts +115 -0
- package/src/runtime/routes/conversation-routes.ts +367 -146
- package/src/runtime/routes/filing-routes.ts +93 -0
- package/src/runtime/routes/home-feed-routes.ts +334 -0
- package/src/runtime/routes/home-state-routes.ts +138 -0
- package/src/runtime/routes/host-browser-routes.ts +3 -14
- package/src/runtime/routes/identity-intro-cache.ts +7 -3
- package/src/runtime/routes/identity-routes.ts +3 -17
- package/src/runtime/routes/inbound-stages/transcribe-audio.test.ts +46 -39
- package/src/runtime/routes/inbound-stages/transcribe-audio.ts +15 -15
- package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +137 -0
- package/src/runtime/routes/integrations/slack/__tests__/share.test.ts +179 -0
- package/src/runtime/routes/integrations/slack/channel.ts +11 -3
- package/src/runtime/routes/integrations/slack/share.ts +45 -7
- package/src/runtime/routes/llm-context-normalization.ts +303 -0
- package/src/runtime/routes/memory-item-routes.test.ts +3 -2
- package/src/runtime/routes/migration-routes.ts +40 -5
- package/src/runtime/routes/settings-routes.ts +22 -5
- package/src/runtime/routes/skills-routes.ts +76 -7
- package/src/runtime/routes/stt-routes.ts +233 -0
- package/src/runtime/routes/surface-action-routes.ts +41 -2
- package/src/runtime/routes/tts-routes.ts +108 -24
- package/src/runtime/routes/usage-routes.ts +30 -2
- package/src/runtime/routes/user-route-dispatcher.ts +50 -5
- package/src/runtime/routes/user-routes.ts +13 -1
- package/src/runtime/routes/work-items-routes.ts +8 -1
- package/src/runtime/runtime-mode.ts +33 -0
- package/src/runtime/services/__tests__/analyze-conversation.test.ts +444 -0
- package/src/runtime/services/__tests__/analyze-deps-singleton.test.ts +67 -0
- package/src/runtime/services/__tests__/auto-analysis-prompt.test.ts +53 -0
- package/src/runtime/services/__tests__/manual-analysis-prompt.test.ts +41 -0
- package/src/runtime/services/analyze-conversation.ts +344 -0
- package/src/runtime/services/analyze-deps-singleton.ts +32 -0
- package/src/runtime/services/auto-analysis-prompt.ts +55 -0
- package/src/runtime/skill-route-registry.ts +49 -0
- package/src/runtime/slack-block-formatting.ts +437 -10
- package/src/schedule/scheduler.ts +50 -0
- package/src/security/oauth2.ts +26 -4
- package/src/security/secure-keys.ts +25 -2
- package/src/security/token-manager.ts +8 -0
- package/src/sequence/engine.ts +23 -0
- package/src/sequence/types.ts +1 -1
- package/src/skills/catalog-files.ts +64 -2
- package/src/skills/category-inference.ts +122 -0
- package/src/skills/clawhub-files.ts +213 -0
- package/src/skills/clawhub.ts +84 -23
- package/src/skills/skill-file-provider.ts +40 -0
- package/src/skills/skillssh-files.ts +395 -0
- package/src/skills/skillssh-registry.ts +4 -4
- package/src/stt/__tests__/daemon-batch-transcriber.test.ts +392 -0
- package/src/stt/__tests__/types.test.ts +89 -0
- package/src/stt/daemon-batch-transcriber.ts +195 -0
- package/src/stt/stt-stream-session.ts +499 -0
- package/src/stt/types.ts +330 -0
- package/src/stt/wav-encoder.test.ts +373 -0
- package/src/stt/wav-encoder.ts +175 -0
- package/src/subagent/manager.ts +38 -14
- package/src/tools/browser/__tests__/browser-mode.test.ts +119 -0
- package/src/tools/browser/__tests__/browser-status.test.ts +123 -0
- package/src/tools/browser/browser-execution.ts +1163 -23
- package/src/tools/browser/browser-manager.ts +45 -0
- package/src/tools/browser/browser-mode-constants.ts +12 -0
- package/src/tools/browser/browser-mode.ts +92 -0
- package/src/tools/browser/browser-status-constants.ts +33 -0
- package/src/tools/browser/cdp-client/__tests__/cdp-inspect-client.test.ts +393 -0
- package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +29 -0
- package/src/tools/browser/cdp-client/__tests__/factory.test.ts +1648 -32
- package/src/tools/browser/cdp-client/cdp-inspect/__tests__/discovery.test.ts +264 -0
- package/src/tools/browser/cdp-client/cdp-inspect/discovery.ts +183 -17
- package/src/tools/browser/cdp-client/cdp-inspect-client.ts +254 -21
- package/src/tools/browser/cdp-client/errors.ts +15 -0
- package/src/tools/browser/cdp-client/extension-cdp-client.ts +39 -16
- package/src/tools/browser/cdp-client/factory.ts +797 -87
- package/src/tools/browser/cdp-client/index.ts +16 -2
- package/src/tools/browser/cdp-client/types.ts +68 -0
- package/src/tools/credentials/vault.ts +35 -6
- package/src/tools/network/web-fetch.ts +5 -2
- package/src/tools/network/web-search.ts +5 -2
- package/src/tools/shared/shell-output.ts +3 -1
- package/src/tools/side-effects.ts +2 -0
- package/src/tools/skills/sandbox-runner.ts +3 -2
- package/src/tools/terminal/safe-env.ts +10 -2
- package/src/tools/terminal/shell.ts +15 -4
- package/src/tools/tool-manifest.ts +21 -0
- package/src/tools/types.ts +17 -0
- package/src/tools/ui-surface/definitions.ts +6 -1
- package/src/tts/__tests__/provider-adapters.test.ts +834 -0
- package/src/tts/__tests__/provider-catalog-consistency.test.ts +196 -0
- package/src/tts/__tests__/provider-catalog.test.ts +183 -0
- package/src/tts/__tests__/provider-registry.test.ts +90 -0
- package/src/tts/provider-catalog.ts +201 -0
- package/src/tts/provider-registry.ts +73 -0
- package/src/tts/providers/deepgram-provider.ts +219 -0
- package/src/tts/providers/elevenlabs-provider.ts +211 -0
- package/src/tts/providers/fish-audio-provider.ts +183 -0
- package/src/tts/providers/index.ts +42 -0
- package/src/tts/providers/register-builtins.ts +130 -0
- package/src/tts/synthesize-text.ts +110 -0
- package/src/tts/tts-config-resolver.ts +78 -0
- package/src/tts/types.ts +153 -0
- package/src/types/onboarding-context.ts +7 -0
- package/src/util/abort-reasons.ts +58 -0
- package/src/util/device-id.ts +32 -16
- package/src/util/errors.ts +9 -1
- package/src/util/platform.ts +54 -10
- package/src/util/pricing.ts +66 -3
- package/src/util/spawn.ts +1 -1
- package/src/util/truncate.ts +4 -2
- package/src/util/unicode.ts +201 -0
- package/src/version.ts +19 -24
- package/src/watcher/engine.ts +23 -0
- package/src/watcher/watcher-store.ts +31 -0
- package/src/workspace/migrations/003-seed-device-id.ts +9 -3
- package/src/workspace/migrations/017-seed-persona-dirs.ts +68 -4
- package/src/workspace/migrations/029-seed-pkb.ts +1 -1
- package/src/workspace/migrations/031-drop-user-md.ts +317 -0
- package/src/workspace/migrations/031-llm-log-retention-zero-to-null.ts +73 -0
- package/src/workspace/migrations/032-tts-provider-unification.ts +227 -0
- package/src/workspace/migrations/033-stt-service-explicit-config.ts +122 -0
- package/src/workspace/migrations/034-remove-calls-voice-transcription-provider.ts +215 -0
- package/src/workspace/migrations/035-seed-slack-channel-persona.ts +50 -0
- package/src/workspace/migrations/036-update-pkb-index-bar.ts +37 -0
- package/src/workspace/migrations/037-create-meets-dir.ts +61 -0
- package/src/workspace/migrations/registry.ts +16 -0
- package/src/workspace/top-level-renderer.ts +13 -1
- package/src/workspace/turn-commit.ts +31 -0
- package/src/__tests__/email-cli.test.ts +0 -297
- package/src/__tests__/email-service-config-fallback.test.ts +0 -102
- package/src/cli/commands/browser-relay.ts +0 -466
- package/src/email/guardrails.ts +0 -221
- package/src/email/provider.ts +0 -117
- package/src/email/providers/agentmail.ts +0 -361
- package/src/email/providers/index.ts +0 -65
- package/src/email/service.ts +0 -384
- package/src/email/types.ts +0 -126
- package/src/prompts/templates/USER.md +0 -13
- package/src/providers/speech-to-text/types.ts +0 -17
- package/src/runtime/routes/browser-cdp-routes.ts +0 -229
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* STT session module for media-stream call ingestion.
|
|
3
|
+
*
|
|
4
|
+
* Consumes segmented audio turns (produced by {@link MediaTurnDetector})
|
|
5
|
+
* and invokes the PR-1 telephony STT capability resolver to transcribe
|
|
6
|
+
* them via the configured `services.stt` provider.
|
|
7
|
+
*
|
|
8
|
+
* This module is **integration-neutral** — it exposes callback hooks
|
|
9
|
+
* (`onSpeechStart`, `onTranscriptFinal`, `onDtmf`, `onStop`) and is
|
|
10
|
+
* not wired to any active call ingress path. A future media-stream
|
|
11
|
+
* call adapter PR will instantiate and connect it.
|
|
12
|
+
*
|
|
13
|
+
* Error handling:
|
|
14
|
+
* - When the telephony resolver returns a non-supported status, the
|
|
15
|
+
* session reports the failure through `onError` and stops processing.
|
|
16
|
+
* - Individual turn transcription failures (timeouts, provider errors)
|
|
17
|
+
* are reported through `onError` without tearing down the session.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import {
|
|
21
|
+
resolveTelephonySttCapability,
|
|
22
|
+
type TelephonySttCapability,
|
|
23
|
+
} from "../providers/speech-to-text/resolve.js";
|
|
24
|
+
import { resolveBatchTranscriber } from "../providers/speech-to-text/resolve.js";
|
|
25
|
+
import { normalizeSttError } from "../stt/daemon-batch-transcriber.js";
|
|
26
|
+
import type { SttCallContextHints } from "../stt/types.js";
|
|
27
|
+
import { getLogger } from "../util/logger.js";
|
|
28
|
+
import { parseMediaStreamFrame } from "./media-stream-parser.js";
|
|
29
|
+
import type {
|
|
30
|
+
MediaStreamMediaEvent,
|
|
31
|
+
MediaStreamStartEvent,
|
|
32
|
+
} from "./media-stream-protocol.js";
|
|
33
|
+
import {
|
|
34
|
+
MediaTurnDetector,
|
|
35
|
+
type TurnDetectorConfig,
|
|
36
|
+
} from "./media-turn-detector.js";
|
|
37
|
+
|
|
38
|
+
const log = getLogger("media-stt-session");
|
|
39
|
+
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Configuration
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
export interface MediaStreamSttSessionConfig {
|
|
45
|
+
/** Overrides for the turn detector thresholds. */
|
|
46
|
+
turnDetector?: TurnDetectorConfig;
|
|
47
|
+
|
|
48
|
+
/** Per-request transcription timeout in milliseconds. Default: 10_000. */
|
|
49
|
+
transcriptionTimeoutMs?: number;
|
|
50
|
+
|
|
51
|
+
/** Optional call-context hints forwarded to the STT provider. */
|
|
52
|
+
callContextHints?: SttCallContextHints;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const DEFAULT_TRANSCRIPTION_TIMEOUT_MS = 10_000;
|
|
56
|
+
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
// Callback hooks
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
export interface MediaStreamSttSessionCallbacks {
|
|
62
|
+
/** Called when the turn detector transitions to active (first speech-bearing chunk). */
|
|
63
|
+
onSpeechStart?: () => void;
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Called when a completed turn has been transcribed successfully.
|
|
67
|
+
*
|
|
68
|
+
* @param text - The transcribed text (trimmed). May be empty for silence.
|
|
69
|
+
* @param durationMs - Approximate duration of the audio turn.
|
|
70
|
+
*/
|
|
71
|
+
onTranscriptFinal?: (text: string, durationMs: number) => void;
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Called when a DTMF digit is received from Twilio.
|
|
75
|
+
*/
|
|
76
|
+
onDtmf?: (digit: string) => void;
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Called when the media stream stops.
|
|
80
|
+
*/
|
|
81
|
+
onStop?: () => void;
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Called when an error occurs (provider error, timeout, no-provider, etc.).
|
|
85
|
+
*
|
|
86
|
+
* @param category - A structured error category.
|
|
87
|
+
* @param message - Human-readable description.
|
|
88
|
+
*/
|
|
89
|
+
onError?: (category: string, message: string) => void;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// ---------------------------------------------------------------------------
|
|
93
|
+
// Session
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
export class MediaStreamSttSession {
|
|
97
|
+
private readonly config: MediaStreamSttSessionConfig;
|
|
98
|
+
private readonly callbacks: MediaStreamSttSessionCallbacks;
|
|
99
|
+
private readonly turnDetector: MediaTurnDetector;
|
|
100
|
+
private readonly transcriptionTimeoutMs: number;
|
|
101
|
+
|
|
102
|
+
/** Buffer of base64-encoded audio payloads for the current turn. */
|
|
103
|
+
private currentTurnChunks: string[] = [];
|
|
104
|
+
|
|
105
|
+
/** Stream metadata from the `start` event. */
|
|
106
|
+
private streamSid: string | null = null;
|
|
107
|
+
private callSid: string | null = null;
|
|
108
|
+
private encoding: string | null = null;
|
|
109
|
+
|
|
110
|
+
/** Whether the session has been disposed. */
|
|
111
|
+
private disposed = false;
|
|
112
|
+
|
|
113
|
+
/** Capability snapshot — resolved lazily on first turn end. */
|
|
114
|
+
private capabilityPromise: Promise<TelephonySttCapability> | null = null;
|
|
115
|
+
|
|
116
|
+
/** Session-level abort controller for the active transcription request. */
|
|
117
|
+
private activeTranscriptionAbort: AbortController | null = null;
|
|
118
|
+
|
|
119
|
+
constructor(
|
|
120
|
+
config: MediaStreamSttSessionConfig = {},
|
|
121
|
+
callbacks: MediaStreamSttSessionCallbacks = {},
|
|
122
|
+
) {
|
|
123
|
+
this.config = config;
|
|
124
|
+
this.callbacks = callbacks;
|
|
125
|
+
this.transcriptionTimeoutMs =
|
|
126
|
+
config.transcriptionTimeoutMs ?? DEFAULT_TRANSCRIPTION_TIMEOUT_MS;
|
|
127
|
+
|
|
128
|
+
this.turnDetector = new MediaTurnDetector(config.turnDetector, {
|
|
129
|
+
onTurnStart: () => {
|
|
130
|
+
// Clear inter-turn silence that accumulated while idle so each
|
|
131
|
+
// transcription request contains only speech-relevant chunks.
|
|
132
|
+
this.currentTurnChunks = [];
|
|
133
|
+
this.callbacks.onSpeechStart?.();
|
|
134
|
+
},
|
|
135
|
+
onTurnEnd: (reason, durationMs) => {
|
|
136
|
+
void this.handleTurnEnd(reason, durationMs);
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Feed a raw WebSocket message into the session. The message is parsed,
|
|
143
|
+
* validated, and routed to the appropriate handler.
|
|
144
|
+
*/
|
|
145
|
+
handleMessage(raw: string): void {
|
|
146
|
+
if (this.disposed) return;
|
|
147
|
+
|
|
148
|
+
const result = parseMediaStreamFrame(raw);
|
|
149
|
+
if (!result.ok) {
|
|
150
|
+
log.debug({ error: result.error }, "Dropped malformed media frame");
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const event = result.event;
|
|
155
|
+
switch (event.event) {
|
|
156
|
+
case "start":
|
|
157
|
+
this.handleStart(event);
|
|
158
|
+
break;
|
|
159
|
+
case "media":
|
|
160
|
+
this.handleMedia(event);
|
|
161
|
+
break;
|
|
162
|
+
case "dtmf":
|
|
163
|
+
this.callbacks.onDtmf?.(event.dtmf.digit);
|
|
164
|
+
break;
|
|
165
|
+
case "mark":
|
|
166
|
+
// Marks are informational — no action needed in the STT session.
|
|
167
|
+
break;
|
|
168
|
+
case "stop":
|
|
169
|
+
this.handleStop();
|
|
170
|
+
break;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Dispose of the session, clearing all timers and buffers.
|
|
176
|
+
*/
|
|
177
|
+
dispose(): void {
|
|
178
|
+
this.disposed = true;
|
|
179
|
+
this.activeTranscriptionAbort?.abort();
|
|
180
|
+
this.activeTranscriptionAbort = null;
|
|
181
|
+
this.turnDetector.dispose();
|
|
182
|
+
this.currentTurnChunks = [];
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// ── Event handlers ─────────────────────────────────────────────────
|
|
186
|
+
|
|
187
|
+
private handleStart(event: MediaStreamStartEvent): void {
|
|
188
|
+
this.streamSid = event.streamSid;
|
|
189
|
+
this.callSid = event.start.callSid;
|
|
190
|
+
this.encoding = event.start.mediaFormat.encoding;
|
|
191
|
+
|
|
192
|
+
log.info(
|
|
193
|
+
{
|
|
194
|
+
streamSid: this.streamSid,
|
|
195
|
+
callSid: this.callSid,
|
|
196
|
+
encoding: this.encoding,
|
|
197
|
+
sampleRate: event.start.mediaFormat.sampleRate,
|
|
198
|
+
},
|
|
199
|
+
"Media stream STT session started",
|
|
200
|
+
);
|
|
201
|
+
|
|
202
|
+
// Eagerly resolve capability so it's cached by the time the first
|
|
203
|
+
// turn completes.
|
|
204
|
+
this.capabilityPromise = resolveTelephonySttCapability();
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
private handleMedia(event: MediaStreamMediaEvent): void {
|
|
208
|
+
// Only process inbound (caller) audio
|
|
209
|
+
if (event.media.track !== "inbound") return;
|
|
210
|
+
|
|
211
|
+
// Compute speech activity from the audio payload using a lightweight
|
|
212
|
+
// energy heuristic. mu-law encoded audio has a companded dynamic
|
|
213
|
+
// range — silence sits near 0xFF/0x7F while speech has higher energy.
|
|
214
|
+
//
|
|
215
|
+
// The detector call runs BEFORE the push so that the onTurnStart
|
|
216
|
+
// callback can clear stale inter-turn silence from the buffer
|
|
217
|
+
// without also wiping the first speech chunk of the new turn.
|
|
218
|
+
const hasSpeech = detectSpeechActivity(event.media.payload);
|
|
219
|
+
this.turnDetector.onMediaChunk(hasSpeech);
|
|
220
|
+
|
|
221
|
+
this.currentTurnChunks.push(event.media.payload);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
private handleStop(): void {
|
|
225
|
+
// Finalize any in-flight turn
|
|
226
|
+
this.turnDetector.forceEnd();
|
|
227
|
+
this.callbacks.onStop?.();
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// ── Turn completion ────────────────────────────────────────────────
|
|
231
|
+
|
|
232
|
+
private async handleTurnEnd(
|
|
233
|
+
_reason: "silence" | "max-duration",
|
|
234
|
+
durationMs: number,
|
|
235
|
+
): Promise<void> {
|
|
236
|
+
const chunks = this.currentTurnChunks;
|
|
237
|
+
this.currentTurnChunks = [];
|
|
238
|
+
|
|
239
|
+
if (chunks.length === 0) {
|
|
240
|
+
// Silence turn — no audio to transcribe.
|
|
241
|
+
this.callbacks.onTranscriptFinal?.("", durationMs);
|
|
242
|
+
return;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Resolve telephony capability (cached after first call)
|
|
246
|
+
if (!this.capabilityPromise) {
|
|
247
|
+
this.capabilityPromise = resolveTelephonySttCapability();
|
|
248
|
+
}
|
|
249
|
+
const capability = await this.capabilityPromise;
|
|
250
|
+
if (this.disposed) return;
|
|
251
|
+
|
|
252
|
+
if (capability.status !== "supported") {
|
|
253
|
+
const reason =
|
|
254
|
+
capability.status === "unsupported"
|
|
255
|
+
? capability.reason
|
|
256
|
+
: capability.status === "unconfigured"
|
|
257
|
+
? capability.reason
|
|
258
|
+
: capability.status === "missing-credentials"
|
|
259
|
+
? capability.reason
|
|
260
|
+
: "Unknown STT capability status";
|
|
261
|
+
|
|
262
|
+
this.callbacks.onError?.(capability.status, reason);
|
|
263
|
+
return;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// Decode the base64 audio chunks into a single buffer.
|
|
267
|
+
const rawAudio = this.decodeAudioChunks(chunks);
|
|
268
|
+
|
|
269
|
+
// Wrap raw μ-law PCM in a WAV container so downstream transcribers
|
|
270
|
+
// (e.g. Whisper) receive a recognised audio format with correct headers.
|
|
271
|
+
const isMulaw = this.encoding === "audio/x-mulaw";
|
|
272
|
+
const audioBuffer = isMulaw ? wrapMulawWav(rawAudio) : rawAudio;
|
|
273
|
+
const mimeType = isMulaw ? "audio/wav" : "audio/raw";
|
|
274
|
+
|
|
275
|
+
// Resolve a batch transcriber for the configured provider.
|
|
276
|
+
let transcriber;
|
|
277
|
+
try {
|
|
278
|
+
transcriber = await resolveBatchTranscriber();
|
|
279
|
+
} catch (err) {
|
|
280
|
+
if (this.disposed) return;
|
|
281
|
+
const normalized = normalizeSttError(err);
|
|
282
|
+
this.callbacks.onError?.(normalized.category, normalized.message);
|
|
283
|
+
return;
|
|
284
|
+
}
|
|
285
|
+
if (this.disposed) return;
|
|
286
|
+
|
|
287
|
+
if (!transcriber) {
|
|
288
|
+
this.callbacks.onError?.(
|
|
289
|
+
"unconfigured",
|
|
290
|
+
"No batch transcriber available for the configured STT provider",
|
|
291
|
+
);
|
|
292
|
+
return;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Transcribe with a timeout, using a session-level abort controller
|
|
296
|
+
// so dispose() can cancel in-flight requests.
|
|
297
|
+
const controller = new AbortController();
|
|
298
|
+
this.activeTranscriptionAbort = controller;
|
|
299
|
+
const timeoutId = setTimeout(
|
|
300
|
+
() => controller.abort(),
|
|
301
|
+
this.transcriptionTimeoutMs,
|
|
302
|
+
);
|
|
303
|
+
|
|
304
|
+
try {
|
|
305
|
+
const result = await transcriber.transcribe({
|
|
306
|
+
audio: audioBuffer,
|
|
307
|
+
mimeType,
|
|
308
|
+
signal: controller.signal,
|
|
309
|
+
callContext: this.config.callContextHints,
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
if (this.disposed) return;
|
|
313
|
+
this.callbacks.onTranscriptFinal?.(result.text, durationMs);
|
|
314
|
+
} catch (err) {
|
|
315
|
+
if (this.disposed) return;
|
|
316
|
+
const normalized = normalizeSttError(err);
|
|
317
|
+
this.callbacks.onError?.(normalized.category, normalized.message);
|
|
318
|
+
} finally {
|
|
319
|
+
clearTimeout(timeoutId);
|
|
320
|
+
if (this.activeTranscriptionAbort === controller) {
|
|
321
|
+
this.activeTranscriptionAbort = null;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// ── Helpers ────────────────────────────────────────────────────────
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Decode an array of base64-encoded audio chunks into a single Buffer.
|
|
330
|
+
*/
|
|
331
|
+
private decodeAudioChunks(chunks: string[]): Buffer {
|
|
332
|
+
const buffers = chunks.map((chunk) => Buffer.from(chunk, "base64"));
|
|
333
|
+
return Buffer.concat(buffers);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// ---------------------------------------------------------------------------
|
|
338
|
+
// Speech activity detection
|
|
339
|
+
// ---------------------------------------------------------------------------
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Lightweight energy-based speech activity detector for mu-law encoded audio.
|
|
343
|
+
*
|
|
344
|
+
* mu-law encoding compands the dynamic range so that silence values cluster
|
|
345
|
+
* around 0xFF (negative zero) and 0x7F (positive zero). Speech produces
|
|
346
|
+
* samples with lower byte values (higher decoded amplitude).
|
|
347
|
+
*
|
|
348
|
+
* This function decodes the base64 payload, computes the average absolute
|
|
349
|
+
* linear amplitude of the mu-law samples, and compares it against a
|
|
350
|
+
* threshold. The threshold is tuned for Twilio's 8 kHz, 8-bit mu-law
|
|
351
|
+
* stream where typical silence RMS is ~50-100 and speech is >300.
|
|
352
|
+
*
|
|
353
|
+
* Exported for testing.
|
|
354
|
+
*
|
|
355
|
+
* @param base64Payload - Base64-encoded mu-law audio chunk from Twilio.
|
|
356
|
+
* @returns `true` if the chunk likely contains speech, `false` otherwise.
|
|
357
|
+
*/
|
|
358
|
+
export function detectSpeechActivity(base64Payload: string): boolean {
|
|
359
|
+
const SPEECH_ENERGY_THRESHOLD = 200;
|
|
360
|
+
|
|
361
|
+
let raw: Buffer;
|
|
362
|
+
try {
|
|
363
|
+
raw = Buffer.from(base64Payload, "base64");
|
|
364
|
+
} catch {
|
|
365
|
+
return false;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
if (raw.length === 0) return false;
|
|
369
|
+
|
|
370
|
+
// Compute average absolute linear amplitude from mu-law samples.
|
|
371
|
+
let totalAmplitude = 0;
|
|
372
|
+
for (let i = 0; i < raw.length; i++) {
|
|
373
|
+
totalAmplitude += mulawToLinearMagnitude(raw[i]);
|
|
374
|
+
}
|
|
375
|
+
const avgAmplitude = totalAmplitude / raw.length;
|
|
376
|
+
|
|
377
|
+
return avgAmplitude > SPEECH_ENERGY_THRESHOLD;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Convert a single mu-law byte to its approximate absolute linear magnitude.
|
|
382
|
+
*
|
|
383
|
+
* mu-law decoding formula (ITU-T G.711):
|
|
384
|
+
* - Bit 7 is the sign bit (0 = positive, 1 = negative).
|
|
385
|
+
* - Bits 6-4 are the exponent (3 bits).
|
|
386
|
+
* - Bits 3-0 are the mantissa (4 bits).
|
|
387
|
+
*
|
|
388
|
+
* The decoded value is: sign * ((mantissa << 1 | 0x21) << exponent) - 0x21
|
|
389
|
+
* We return the absolute value since we only care about energy.
|
|
390
|
+
*/
|
|
391
|
+
function mulawToLinearMagnitude(mulawByte: number): number {
|
|
392
|
+
// mu-law bytes are bitwise-inverted in Twilio's encoding
|
|
393
|
+
const b = ~mulawByte & 0xff;
|
|
394
|
+
const exponent = (b >> 4) & 0x07;
|
|
395
|
+
const mantissa = b & 0x0f;
|
|
396
|
+
const magnitude = ((mantissa << 1) | 0x21) << exponent;
|
|
397
|
+
return magnitude - 0x21;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// ---------------------------------------------------------------------------
|
|
401
|
+
// WAV helpers
|
|
402
|
+
// ---------------------------------------------------------------------------
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Wrap raw μ-law PCM data in a minimal WAV container (44-byte RIFF header).
|
|
406
|
+
*
|
|
407
|
+
* Twilio sends 8 kHz, mono, 8-bit μ-law audio. The WAV format code for
|
|
408
|
+
* μ-law is 0x0007.
|
|
409
|
+
*
|
|
410
|
+
* This ensures downstream transcribers that inspect the MIME type or file
|
|
411
|
+
* extension (e.g. Whisper) receive a recognised container format.
|
|
412
|
+
*/
|
|
413
|
+
function wrapMulawWav(pcm: Buffer): Buffer {
|
|
414
|
+
const SAMPLE_RATE = 8000;
|
|
415
|
+
const NUM_CHANNELS = 1;
|
|
416
|
+
const BITS_PER_SAMPLE = 8;
|
|
417
|
+
const MULAW_FORMAT_TAG = 0x0007;
|
|
418
|
+
const HEADER_SIZE = 44;
|
|
419
|
+
|
|
420
|
+
const byteRate = SAMPLE_RATE * NUM_CHANNELS * (BITS_PER_SAMPLE / 8);
|
|
421
|
+
const blockAlign = NUM_CHANNELS * (BITS_PER_SAMPLE / 8);
|
|
422
|
+
const dataSize = pcm.length;
|
|
423
|
+
const fileSize = HEADER_SIZE + dataSize - 8; // RIFF chunk size excludes first 8 bytes
|
|
424
|
+
|
|
425
|
+
const header = Buffer.alloc(HEADER_SIZE);
|
|
426
|
+
let offset = 0;
|
|
427
|
+
|
|
428
|
+
// RIFF header
|
|
429
|
+
header.write("RIFF", offset);
|
|
430
|
+
offset += 4;
|
|
431
|
+
header.writeUInt32LE(fileSize, offset);
|
|
432
|
+
offset += 4;
|
|
433
|
+
header.write("WAVE", offset);
|
|
434
|
+
offset += 4;
|
|
435
|
+
|
|
436
|
+
// fmt sub-chunk
|
|
437
|
+
header.write("fmt ", offset);
|
|
438
|
+
offset += 4;
|
|
439
|
+
header.writeUInt32LE(16, offset); // sub-chunk size (PCM = 16)
|
|
440
|
+
offset += 4;
|
|
441
|
+
header.writeUInt16LE(MULAW_FORMAT_TAG, offset); // audio format: μ-law
|
|
442
|
+
offset += 2;
|
|
443
|
+
header.writeUInt16LE(NUM_CHANNELS, offset);
|
|
444
|
+
offset += 2;
|
|
445
|
+
header.writeUInt32LE(SAMPLE_RATE, offset);
|
|
446
|
+
offset += 4;
|
|
447
|
+
header.writeUInt32LE(byteRate, offset);
|
|
448
|
+
offset += 4;
|
|
449
|
+
header.writeUInt16LE(blockAlign, offset);
|
|
450
|
+
offset += 2;
|
|
451
|
+
header.writeUInt16LE(BITS_PER_SAMPLE, offset);
|
|
452
|
+
offset += 2;
|
|
453
|
+
|
|
454
|
+
// data sub-chunk
|
|
455
|
+
header.write("data", offset);
|
|
456
|
+
offset += 4;
|
|
457
|
+
header.writeUInt32LE(dataSize, offset);
|
|
458
|
+
|
|
459
|
+
return Buffer.concat([header, pcm]);
|
|
460
|
+
}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Speech-aware turn detector for segmenting inbound audio from a
|
|
3
|
+
* Twilio Media Stream into discrete utterance "turns".
|
|
4
|
+
*
|
|
5
|
+
* The Twilio ConversationRelay protocol performs VAD (voice activity
|
|
6
|
+
* detection) on Twilio's side and delivers fully segmented transcripts
|
|
7
|
+
* via `prompt` messages. The raw media-stream path, however, delivers a
|
|
8
|
+
* continuous stream of audio chunks with no built-in turn boundaries.
|
|
9
|
+
* This module bridges that gap by detecting turns based on speech
|
|
10
|
+
* activity signals derived from the audio content:
|
|
11
|
+
*
|
|
12
|
+
* 1. **Speech-to-silence transition** — when a period of speech is
|
|
13
|
+
* followed by silence frames exceeding `silenceThresholdMs`, the
|
|
14
|
+
* current turn is considered complete.
|
|
15
|
+
* 2. **Max turn duration** — to prevent unbounded accumulation, a turn
|
|
16
|
+
* is forcibly ended when its total duration exceeds `maxTurnDurationMs`.
|
|
17
|
+
*
|
|
18
|
+
* Continuous inbound media frames (which Twilio sends at a steady
|
|
19
|
+
* cadence regardless of speech) do not prevent turn boundaries. Only
|
|
20
|
+
* frames classified as containing speech reset the silence timer. Turns
|
|
21
|
+
* that never contain a speech-bearing chunk are silently discarded
|
|
22
|
+
* without firing the `onTurnEnd` callback.
|
|
23
|
+
*
|
|
24
|
+
* Design:
|
|
25
|
+
* - Stateful but single-threaded (no locking; runs on the main event loop).
|
|
26
|
+
* - Timer-based silence detection via `setTimeout` / `clearTimeout`.
|
|
27
|
+
* - Integration-neutral: emits callbacks, not wired to any specific
|
|
28
|
+
* downstream consumer.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Configuration
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
export interface TurnDetectorConfig {
|
|
36
|
+
/**
|
|
37
|
+
* Duration of silence (no speech-active chunks) after which the current
|
|
38
|
+
* turn is considered complete. Milliseconds. Default: 800.
|
|
39
|
+
*/
|
|
40
|
+
silenceThresholdMs?: number;
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Maximum duration of a single turn before it is forcibly ended.
|
|
44
|
+
* Milliseconds. Default: 30_000 (30 seconds).
|
|
45
|
+
*/
|
|
46
|
+
maxTurnDurationMs?: number;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const DEFAULT_SILENCE_THRESHOLD_MS = 800;
|
|
50
|
+
const DEFAULT_MAX_TURN_DURATION_MS = 30_000;
|
|
51
|
+
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Callbacks
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
export interface TurnDetectorCallbacks {
|
|
57
|
+
/**
|
|
58
|
+
* Called when the detector transitions from idle to active (first
|
|
59
|
+
* speech-bearing chunk of a new turn). Useful for signalling
|
|
60
|
+
* "speech started" upstream.
|
|
61
|
+
*/
|
|
62
|
+
onTurnStart?: () => void;
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Called when the current turn ends (silence timeout or max duration).
|
|
66
|
+
*
|
|
67
|
+
* @param reason - `"silence"` when the silence timer expired, or
|
|
68
|
+
* `"max-duration"` when the turn hit the hard cap.
|
|
69
|
+
* @param durationMs - Approximate wall-clock duration of the turn in
|
|
70
|
+
* milliseconds (from the first speech chunk to the end trigger).
|
|
71
|
+
*/
|
|
72
|
+
onTurnEnd?: (reason: "silence" | "max-duration", durationMs: number) => void;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
// Turn detector
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
export class MediaTurnDetector {
|
|
80
|
+
private readonly silenceThresholdMs: number;
|
|
81
|
+
private readonly maxTurnDurationMs: number;
|
|
82
|
+
private readonly callbacks: TurnDetectorCallbacks;
|
|
83
|
+
|
|
84
|
+
/** Whether a turn is currently in progress. */
|
|
85
|
+
private active = false;
|
|
86
|
+
|
|
87
|
+
/** Whether any speech-bearing chunk was received during the current turn. */
|
|
88
|
+
private hasSpeechInTurn = false;
|
|
89
|
+
|
|
90
|
+
/** Wall-clock timestamp of the first speech chunk in the current turn. */
|
|
91
|
+
private turnStartedAt = 0;
|
|
92
|
+
|
|
93
|
+
/** Timer that fires when silence exceeds the threshold. */
|
|
94
|
+
private silenceTimer: ReturnType<typeof setTimeout> | null = null;
|
|
95
|
+
|
|
96
|
+
/** Timer that fires when the turn hits max duration. */
|
|
97
|
+
private maxDurationTimer: ReturnType<typeof setTimeout> | null = null;
|
|
98
|
+
|
|
99
|
+
/** Whether the detector has been disposed. */
|
|
100
|
+
private disposed = false;
|
|
101
|
+
|
|
102
|
+
constructor(
|
|
103
|
+
config: TurnDetectorConfig = {},
|
|
104
|
+
callbacks: TurnDetectorCallbacks = {},
|
|
105
|
+
) {
|
|
106
|
+
this.silenceThresholdMs =
|
|
107
|
+
config.silenceThresholdMs ?? DEFAULT_SILENCE_THRESHOLD_MS;
|
|
108
|
+
this.maxTurnDurationMs =
|
|
109
|
+
config.maxTurnDurationMs ?? DEFAULT_MAX_TURN_DURATION_MS;
|
|
110
|
+
this.callbacks = callbacks;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Whether a turn is currently in progress (speech has been detected and
|
|
115
|
+
* neither the silence timer nor the max-duration timer has fired yet).
|
|
116
|
+
*/
|
|
117
|
+
get isActive(): boolean {
|
|
118
|
+
return this.active;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Feed an inbound audio chunk to the detector with speech activity info.
|
|
123
|
+
*
|
|
124
|
+
* Call this for every `media` event received from the Twilio Media
|
|
125
|
+
* Stream. The `hasSpeech` flag indicates whether the chunk contains
|
|
126
|
+
* voice activity (computed by the caller from audio energy analysis).
|
|
127
|
+
*
|
|
128
|
+
* When `hasSpeech` is true:
|
|
129
|
+
* - If idle, starts a new turn (fires onTurnStart).
|
|
130
|
+
* - Resets the silence timer.
|
|
131
|
+
*
|
|
132
|
+
* When `hasSpeech` is false:
|
|
133
|
+
* - If a turn is active, the silence timer continues counting down.
|
|
134
|
+
* Continuous silent frames do not prevent turn boundaries.
|
|
135
|
+
* - If idle, the chunk is ignored (no turn is started for silence).
|
|
136
|
+
*
|
|
137
|
+
* @param hasSpeech - Whether the audio chunk contains detectable speech.
|
|
138
|
+
* Defaults to `true` for backwards compatibility with callers that
|
|
139
|
+
* do not perform energy analysis.
|
|
140
|
+
*/
|
|
141
|
+
onMediaChunk(hasSpeech = true): void {
|
|
142
|
+
if (this.disposed) return;
|
|
143
|
+
|
|
144
|
+
if (hasSpeech) {
|
|
145
|
+
if (!this.active) {
|
|
146
|
+
// Transition from idle -> active: start a new turn.
|
|
147
|
+
this.active = true;
|
|
148
|
+
this.hasSpeechInTurn = true;
|
|
149
|
+
this.turnStartedAt = Date.now();
|
|
150
|
+
this.callbacks.onTurnStart?.();
|
|
151
|
+
|
|
152
|
+
// Arm the max-duration hard cap.
|
|
153
|
+
this.maxDurationTimer = setTimeout(() => {
|
|
154
|
+
this.endTurn("max-duration");
|
|
155
|
+
}, this.maxTurnDurationMs);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Reset the silence timer on speech chunks.
|
|
159
|
+
this.resetSilenceTimer();
|
|
160
|
+
} else if (this.active && this.silenceTimer === null) {
|
|
161
|
+
// Active turn but no speech — start the silence countdown if
|
|
162
|
+
// not already running. This handles the transition from speech
|
|
163
|
+
// to silence within a continuous chunk stream.
|
|
164
|
+
this.resetSilenceTimer();
|
|
165
|
+
}
|
|
166
|
+
// Silent chunks while idle are ignored — no turn is started.
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Force the current turn to end immediately. No-ops if no turn is active.
|
|
171
|
+
*
|
|
172
|
+
* Callers use this when the stream stops (e.g. `stop` event) so the
|
|
173
|
+
* in-flight turn is properly finalized rather than left dangling.
|
|
174
|
+
*/
|
|
175
|
+
forceEnd(): void {
|
|
176
|
+
if (!this.active || this.disposed) return;
|
|
177
|
+
this.endTurn("silence");
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Dispose of the detector, clearing all timers. After calling this the
|
|
182
|
+
* detector is inert and `onMediaChunk` / `forceEnd` become no-ops.
|
|
183
|
+
*/
|
|
184
|
+
dispose(): void {
|
|
185
|
+
this.disposed = true;
|
|
186
|
+
this.clearTimers();
|
|
187
|
+
this.active = false;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// ── Internals ──────────────────────────────────────────────────────
|
|
191
|
+
|
|
192
|
+
private resetSilenceTimer(): void {
|
|
193
|
+
if (this.silenceTimer !== null) {
|
|
194
|
+
clearTimeout(this.silenceTimer);
|
|
195
|
+
}
|
|
196
|
+
this.silenceTimer = setTimeout(() => {
|
|
197
|
+
if (this.hasSpeechInTurn) {
|
|
198
|
+
this.endTurn("silence");
|
|
199
|
+
} else {
|
|
200
|
+
// No speech was detected during the turn — reset state without
|
|
201
|
+
// emitting a turn-end callback to avoid bogus empty turns.
|
|
202
|
+
this.clearTimers();
|
|
203
|
+
this.active = false;
|
|
204
|
+
}
|
|
205
|
+
}, this.silenceThresholdMs);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
private endTurn(reason: "silence" | "max-duration"): void {
|
|
209
|
+
if (!this.active) return;
|
|
210
|
+
|
|
211
|
+
const durationMs = Date.now() - this.turnStartedAt;
|
|
212
|
+
|
|
213
|
+
this.clearTimers();
|
|
214
|
+
this.active = false;
|
|
215
|
+
this.hasSpeechInTurn = false;
|
|
216
|
+
|
|
217
|
+
this.callbacks.onTurnEnd?.(reason, durationMs);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
private clearTimers(): void {
|
|
221
|
+
if (this.silenceTimer !== null) {
|
|
222
|
+
clearTimeout(this.silenceTimer);
|
|
223
|
+
this.silenceTimer = null;
|
|
224
|
+
}
|
|
225
|
+
if (this.maxDurationTimer !== null) {
|
|
226
|
+
clearTimeout(this.maxDurationTimer);
|
|
227
|
+
this.maxDurationTimer = null;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|