@vellumai/assistant 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (667) hide show
  1. package/ARCHITECTURE.md +273 -10
  2. package/Dockerfile +2 -3
  3. package/bun.lock +5 -13
  4. package/docs/backup-troubleshooting.md +52 -0
  5. package/docs/browser-use-architecture-phase2.md +174 -0
  6. package/docs/stt-provider-onboarding.md +120 -0
  7. package/knip.json +12 -2
  8. package/node_modules/@vellumai/ces-contracts/bun.lock +8 -6
  9. package/node_modules/@vellumai/ces-contracts/package.json +3 -3
  10. package/openapi.yaml +982 -72
  11. package/package.json +4 -6
  12. package/scripts/generate-openapi.ts +0 -1
  13. package/scripts/test.sh +73 -18
  14. package/src/__tests__/agent-image-optimize.test.ts +28 -0
  15. package/src/__tests__/agent-loop.test.ts +123 -0
  16. package/src/__tests__/anthropic-provider.test.ts +263 -10
  17. package/src/__tests__/auto-analysis-end-to-end.test.ts +550 -0
  18. package/src/__tests__/auto-analysis-prompt.test.ts +50 -0
  19. package/src/__tests__/browser-fill-credential.test.ts +11 -0
  20. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
  21. package/src/__tests__/browser-skill-endstate.test.ts +31 -7
  22. package/src/__tests__/btw-routes.test.ts +7 -0
  23. package/src/__tests__/call-controller.test.ts +581 -20
  24. package/src/__tests__/catalog-files.test.ts +138 -0
  25. package/src/__tests__/channel-invite-transport.test.ts +2 -2
  26. package/src/__tests__/channel-readiness-routes.test.ts +16 -20
  27. package/src/__tests__/channel-readiness-service.test.ts +12 -7
  28. package/src/__tests__/checker.test.ts +157 -10
  29. package/src/__tests__/clawhub-files.test.ts +347 -0
  30. package/src/__tests__/commit-message-enrichment-service.test.ts +36 -19
  31. package/src/__tests__/config-analysis.test.ts +100 -0
  32. package/src/__tests__/config-schema.test.ts +1013 -66
  33. package/src/__tests__/config-watcher-cleanup-throttle.test.ts +339 -0
  34. package/src/__tests__/config-watcher.test.ts +43 -8
  35. package/src/__tests__/contact-store-user-file.test.ts +512 -0
  36. package/src/__tests__/contacts-write.test.ts +197 -0
  37. package/src/__tests__/context-window-manager.test.ts +88 -0
  38. package/src/__tests__/conversation-abort-tool-results.test.ts +2 -0
  39. package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -0
  40. package/src/__tests__/conversation-agent-loop.test.ts +98 -2
  41. package/src/__tests__/conversation-confirmation-signals.test.ts +135 -0
  42. package/src/__tests__/conversation-error.test.ts +70 -0
  43. package/src/__tests__/conversation-history-web-search.test.ts +11 -4
  44. package/src/__tests__/conversation-init.benchmark.test.ts +6 -1
  45. package/src/__tests__/conversation-launcher-skill-regression.test.ts +51 -0
  46. package/src/__tests__/conversation-list-source.test.ts +145 -0
  47. package/src/__tests__/conversation-pre-run-repair.test.ts +2 -0
  48. package/src/__tests__/conversation-provider-retry-repair.test.ts +2 -0
  49. package/src/__tests__/conversation-queue.test.ts +901 -60
  50. package/src/__tests__/conversation-routes-disk-view.test.ts +270 -0
  51. package/src/__tests__/conversation-runtime-assembly.test.ts +55 -0
  52. package/src/__tests__/conversation-skill-tools.test.ts +7 -4
  53. package/src/__tests__/conversation-slash-commands.test.ts +33 -0
  54. package/src/__tests__/conversation-slash-queue.test.ts +89 -18
  55. package/src/__tests__/conversation-slash-unknown.test.ts +2 -0
  56. package/src/__tests__/conversation-tool-setup-batch-authorized.test.ts +226 -0
  57. package/src/__tests__/conversation-workspace-injection.test.ts +2 -0
  58. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +2 -0
  59. package/src/__tests__/credential-health-service.test.ts +352 -0
  60. package/src/__tests__/credential-security-invariants.test.ts +5 -3
  61. package/src/__tests__/credential-vault-unit.test.ts +379 -3
  62. package/src/__tests__/credentials-cli.test.ts +40 -16
  63. package/src/__tests__/cross-provider-web-search.test.ts +146 -35
  64. package/src/__tests__/deterministic-verification-control-plane.test.ts +10 -1
  65. package/src/__tests__/device-id.test.ts +112 -0
  66. package/src/__tests__/docker-signing-key-bootstrap.test.ts +167 -4
  67. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +1 -3
  68. package/src/__tests__/email-html-renderer.test.ts +71 -0
  69. package/src/__tests__/email-invite-adapter.test.ts +36 -32
  70. package/src/__tests__/emit-event-signal.test.ts +71 -0
  71. package/src/__tests__/extension-id-sync-guard.test.ts +75 -8
  72. package/src/__tests__/fixtures/mock-chrome-extension.ts +11 -0
  73. package/src/__tests__/gateway-only-enforcement.test.ts +206 -1
  74. package/src/__tests__/gateway-only-guard.test.ts +0 -1
  75. package/src/__tests__/gemini-provider.test.ts +64 -0
  76. package/src/__tests__/get-skill-detail-audit.test.ts +325 -0
  77. package/src/__tests__/gmail-archive-fallback.test.ts +193 -0
  78. package/src/__tests__/gmail-archive-gate.test.ts +246 -0
  79. package/src/__tests__/gmail-preferences.test.ts +117 -0
  80. package/src/__tests__/headless-browser-interactions.test.ts +43 -0
  81. package/src/__tests__/headless-browser-mode.test.ts +614 -0
  82. package/src/__tests__/headless-browser-navigate.test.ts +142 -5
  83. package/src/__tests__/headless-browser-read-tools.test.ts +11 -0
  84. package/src/__tests__/headless-browser-snapshot.test.ts +10 -0
  85. package/src/__tests__/heartbeat-service.test.ts +70 -17
  86. package/src/__tests__/home-state-routes.test.ts +162 -0
  87. package/src/__tests__/host-bash-proxy.test.ts +0 -5
  88. package/src/__tests__/host-browser-e2e-cloud.test.ts +138 -4
  89. package/src/__tests__/host-browser-e2e-self-hosted.test.ts +4 -4
  90. package/src/__tests__/host-browser-ws-events-e2e.test.ts +103 -0
  91. package/src/__tests__/host-cu-proxy.test.ts +0 -5
  92. package/src/__tests__/identity-intro-cache.test.ts +40 -10
  93. package/src/__tests__/init-feature-flag-overrides.test.ts +38 -112
  94. package/src/__tests__/jobs-store-upsert-debounced.test.ts +141 -0
  95. package/src/__tests__/llm-context-normalization.test.ts +488 -0
  96. package/src/__tests__/llm-context-route-provider.test.ts +86 -5
  97. package/src/__tests__/llm-usage-store.test.ts +363 -0
  98. package/src/__tests__/media-stream-output.test.ts +555 -0
  99. package/src/__tests__/media-stream-parser.test.ts +374 -0
  100. package/src/__tests__/media-stream-server-integration.test.ts +1234 -0
  101. package/src/__tests__/media-stream-stt-session.test.ts +588 -0
  102. package/src/__tests__/media-turn-detector.test.ts +440 -0
  103. package/src/__tests__/message-queue.test.ts +125 -0
  104. package/src/__tests__/migration-export-http.test.ts +6 -6
  105. package/src/__tests__/migration-import-commit-http.test.ts +8 -6
  106. package/src/__tests__/migration-import-preflight-http.test.ts +6 -5
  107. package/src/__tests__/migration-validate-http.test.ts +3 -3
  108. package/src/__tests__/mock-gateway-ipc.ts +151 -0
  109. package/src/__tests__/model-intents.test.ts +2 -2
  110. package/src/__tests__/oauth-apps-routes.test.ts +1 -0
  111. package/src/__tests__/oauth-cli.test.ts +2 -0
  112. package/src/__tests__/oauth-connect-orchestrator.test.ts +2 -0
  113. package/src/__tests__/oauth-provider-serializer.test.ts +1 -0
  114. package/src/__tests__/oauth-providers-routes.test.ts +2 -0
  115. package/src/__tests__/oauth-store.test.ts +85 -0
  116. package/src/__tests__/oauth2-gateway-transport.test.ts +249 -6
  117. package/src/__tests__/onboarding-template-contract.test.ts +6 -13
  118. package/src/__tests__/openai-provider.test.ts +176 -0
  119. package/src/__tests__/openai-responses-cutover-guard.test.ts +184 -0
  120. package/src/__tests__/openai-responses-provider.test.ts +1105 -0
  121. package/src/__tests__/openrouter-token-estimation.test.ts +100 -0
  122. package/src/__tests__/outlook-unsubscribe.test.ts +31 -2
  123. package/src/__tests__/persona-resolver.test.ts +251 -0
  124. package/src/__tests__/platform-bash-auto-approve.test.ts +4 -0
  125. package/src/__tests__/platform.test.ts +92 -1
  126. package/src/__tests__/post-turn-tool-result-truncation.test.ts +47 -0
  127. package/src/__tests__/prechat-onboarding-contract.test.ts +267 -0
  128. package/src/__tests__/pricing.test.ts +174 -0
  129. package/src/__tests__/qdrant-manager.test.ts +29 -8
  130. package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +194 -0
  131. package/src/__tests__/relationship-state-contract.test.ts +175 -0
  132. package/src/__tests__/relay-server.test.ts +423 -5
  133. package/src/__tests__/search-skills-unified.test.ts +118 -0
  134. package/src/__tests__/secret-scanner-executor.test.ts +4 -0
  135. package/src/__tests__/secure-keys.test.ts +107 -0
  136. package/src/__tests__/send-endpoint-busy.test.ts +5 -1
  137. package/src/__tests__/sequence-store.test.ts +1 -1
  138. package/src/__tests__/server-history-render.test.ts +49 -0
  139. package/src/__tests__/settings-routes.test.ts +201 -0
  140. package/src/__tests__/skill-load-feature-flag.test.ts +1 -0
  141. package/src/__tests__/skills-file-content-endpoint.test.ts +276 -145
  142. package/src/__tests__/skills-files-catalog-fallback.test.ts +381 -93
  143. package/src/__tests__/skills.test.ts +5 -2
  144. package/src/__tests__/skillssh-files.test.ts +446 -0
  145. package/src/__tests__/slack-block-formatting.test.ts +110 -0
  146. package/src/__tests__/slack-channel-config.test.ts +564 -1
  147. package/src/__tests__/stt-catalog-parity.test.ts +282 -0
  148. package/src/__tests__/stt-stream-session.test.ts +535 -0
  149. package/src/__tests__/system-prompt.test.ts +112 -26
  150. package/src/__tests__/telephony-stt-routing.test.ts +329 -0
  151. package/src/__tests__/terminal-tools.test.ts +18 -7
  152. package/src/__tests__/test-preload.ts +18 -0
  153. package/src/__tests__/test-support/browser-skill-harness.ts +4 -1
  154. package/src/__tests__/tool-executor-lifecycle-events.test.ts +9 -5
  155. package/src/__tests__/tool-executor-shell-integration.test.ts +4 -0
  156. package/src/__tests__/tool-executor.test.ts +33 -24
  157. package/src/__tests__/tool-result-truncation.test.ts +36 -0
  158. package/src/__tests__/trust-store.test.ts +7 -1
  159. package/src/__tests__/trusted-contact-approval-notifier.test.ts +1 -1
  160. package/src/__tests__/tts-catalog-parity.test.ts +345 -0
  161. package/src/__tests__/twilio-routes-twiml.test.ts +512 -114
  162. package/src/__tests__/twilio-routes.test.ts +376 -0
  163. package/src/__tests__/unicode.test.ts +293 -0
  164. package/src/__tests__/update-bulletin-format.test.ts +59 -0
  165. package/src/__tests__/update-bulletin.test.ts +206 -5
  166. package/src/__tests__/usage-routes.test.ts +25 -4
  167. package/src/__tests__/user-reference.test.ts +46 -61
  168. package/src/__tests__/verification-control-plane-policy.test.ts +4 -0
  169. package/src/__tests__/voice-config-update.test.ts +403 -0
  170. package/src/__tests__/voice-quality.test.ts +434 -19
  171. package/src/__tests__/workspace-heartbeat-service.test.ts +7 -0
  172. package/src/__tests__/workspace-migration-033-stt-service-explicit-config.test.ts +547 -0
  173. package/src/__tests__/workspace-migration-034-remove-calls-voice-transcription-provider.test.ts +596 -0
  174. package/src/__tests__/workspace-migration-drop-user-md.test.ts +368 -0
  175. package/src/__tests__/workspace-migration-meets.test.ts +244 -0
  176. package/src/__tests__/workspace-migration-seed-device-id.test.ts +14 -20
  177. package/src/__tests__/workspace-policy.test.ts +2 -0
  178. package/src/agent/image-optimize.ts +24 -12
  179. package/src/agent/loop.ts +43 -3
  180. package/src/backup/__tests__/backup-key.test.ts +152 -0
  181. package/src/backup/__tests__/backup-worker.test.ts +767 -0
  182. package/src/backup/__tests__/list-snapshots.test.ts +87 -0
  183. package/src/backup/__tests__/local-writer.test.ts +218 -0
  184. package/src/backup/__tests__/offsite-writer.test.ts +641 -0
  185. package/src/backup/__tests__/paths.test.ts +300 -0
  186. package/src/backup/__tests__/restore.test.ts +498 -0
  187. package/src/backup/__tests__/snapshot-lock.test.ts +352 -0
  188. package/src/backup/__tests__/stream-crypt.test.ts +228 -0
  189. package/src/backup/backup-key.ts +137 -0
  190. package/src/backup/backup-worker.ts +459 -0
  191. package/src/backup/list-snapshots.ts +147 -0
  192. package/src/backup/local-writer.ts +133 -0
  193. package/src/backup/offsite-writer.ts +222 -0
  194. package/src/backup/paths.ts +226 -0
  195. package/src/backup/restore.ts +322 -0
  196. package/src/backup/snapshot-lock.ts +431 -0
  197. package/src/backup/stream-crypt.ts +263 -0
  198. package/src/bundler/package-resolver.ts +4 -0
  199. package/src/calls/audio-store.ts +11 -5
  200. package/src/calls/call-controller.ts +226 -71
  201. package/src/calls/call-domain.ts +9 -0
  202. package/src/calls/call-speech-output.ts +190 -0
  203. package/src/calls/call-transport.ts +77 -0
  204. package/src/calls/media-stream-audio-transcode.ts +173 -0
  205. package/src/calls/media-stream-output.ts +660 -0
  206. package/src/calls/media-stream-parser.ts +300 -0
  207. package/src/calls/media-stream-protocol.ts +166 -0
  208. package/src/calls/media-stream-server.ts +592 -0
  209. package/src/calls/media-stream-stt-session.ts +460 -0
  210. package/src/calls/media-turn-detector.ts +230 -0
  211. package/src/calls/relay-server.ts +90 -75
  212. package/src/calls/resolve-call-tts-provider.ts +136 -0
  213. package/src/calls/telephony-stt-routing.ts +145 -0
  214. package/src/calls/tts-call-strategy.ts +161 -0
  215. package/src/calls/tts-text-sanitizer.ts +32 -16
  216. package/src/calls/twilio-routes.ts +281 -17
  217. package/src/calls/voice-quality.ts +78 -35
  218. package/src/calls/voice-session-bridge.ts +8 -1
  219. package/src/channels/types.ts +16 -0
  220. package/src/cli/__tests__/run-assistant-command.ts +11 -1
  221. package/src/cli/commands/__tests__/backup.test.ts +1165 -0
  222. package/src/cli/commands/__tests__/domain-register.test.ts +234 -0
  223. package/src/cli/commands/__tests__/domain-status.test.ts +132 -0
  224. package/src/cli/commands/__tests__/email-attachment.test.ts +422 -0
  225. package/src/cli/commands/__tests__/email-download.test.ts +16 -1
  226. package/src/cli/commands/__tests__/email-list.test.ts +22 -4
  227. package/src/cli/commands/__tests__/email-register.test.ts +4 -4
  228. package/src/cli/commands/__tests__/email-send.test.ts +37 -4
  229. package/src/cli/commands/__tests__/email-status.test.ts +5 -1
  230. package/src/cli/commands/__tests__/email-unregister.test.ts +34 -5
  231. package/src/cli/commands/backup.ts +993 -0
  232. package/src/cli/commands/conversations.ts +77 -0
  233. package/src/cli/commands/credentials.ts +0 -1
  234. package/src/cli/commands/domain.ts +210 -0
  235. package/src/cli/commands/email.ts +255 -3
  236. package/src/cli/commands/oauth/__tests__/connect.test.ts +12 -0
  237. package/src/cli/commands/oauth/__tests__/providers-delete.test.ts +1 -0
  238. package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -0
  239. package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -0
  240. package/src/cli/commands/oauth/mode.ts +12 -3
  241. package/src/cli/commands/oauth/providers.ts +15 -0
  242. package/src/cli/commands/oauth/shared.ts +2 -1
  243. package/src/cli/commands/platform/__tests__/callback-routes-list.test.ts +4 -9
  244. package/src/cli/commands/platform/__tests__/connect.test.ts +6 -0
  245. package/src/cli/commands/platform/__tests__/disconnect.test.ts +7 -1
  246. package/src/cli/commands/platform/__tests__/status.test.ts +6 -0
  247. package/src/cli/program.ts +30 -4
  248. package/src/config/__tests__/backup-schema.test.ts +134 -0
  249. package/src/config/assistant-feature-flags.ts +61 -62
  250. package/src/config/bundled-skills/app-builder/references/CUSTOM_ROUTES.md +37 -1
  251. package/src/config/bundled-skills/browser/SKILL.md +30 -5
  252. package/src/config/bundled-skills/browser/TOOLS.json +123 -0
  253. package/src/config/bundled-skills/browser/tools/browser-attach.ts +12 -0
  254. package/src/config/bundled-skills/browser/tools/browser-detach.ts +12 -0
  255. package/src/config/bundled-skills/browser/tools/browser-status.ts +12 -0
  256. package/src/config/bundled-skills/browser/tools/browser-wait-for-download.ts +17 -0
  257. package/src/config/bundled-skills/contacts/SKILL.md +2 -2
  258. package/src/config/bundled-skills/gmail/SKILL.md +53 -7
  259. package/src/config/bundled-skills/gmail/TOOLS.json +33 -3
  260. package/src/config/bundled-skills/gmail/tools/gmail-archive.ts +116 -9
  261. package/src/config/bundled-skills/gmail/tools/gmail-outreach-scan.ts +138 -11
  262. package/src/config/bundled-skills/gmail/tools/gmail-preferences-tool.ts +59 -0
  263. package/src/config/bundled-skills/gmail/tools/gmail-preferences.ts +82 -0
  264. package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +113 -17
  265. package/src/config/bundled-skills/gmail/tools/gmail-unsubscribe.ts +2 -2
  266. package/src/config/bundled-skills/media-processing/SKILL.md +3 -9
  267. package/src/config/bundled-skills/media-processing/TOOLS.json +1 -6
  268. package/src/config/bundled-skills/media-processing/__tests__/audio-transcribe.test.ts +125 -0
  269. package/src/config/bundled-skills/media-processing/__tests__/extract-keyframes.test.ts +181 -0
  270. package/src/config/bundled-skills/media-processing/__tests__/preprocess-audio.test.ts +141 -0
  271. package/src/config/bundled-skills/media-processing/services/audio-transcribe.ts +32 -87
  272. package/src/config/bundled-skills/media-processing/services/preprocess.ts +8 -4
  273. package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +0 -10
  274. package/src/config/bundled-skills/messaging/SKILL.md +3 -3
  275. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +2 -2
  276. package/src/config/bundled-skills/outlook/SKILL.md +2 -2
  277. package/src/config/bundled-skills/outlook/tools/outlook-unsubscribe.ts +2 -2
  278. package/src/config/bundled-skills/phone-calls/SKILL.md +2 -2
  279. package/src/config/bundled-skills/phone-calls/references/CONFIG.md +27 -18
  280. package/src/config/bundled-skills/phone-calls/references/TROUBLESHOOTING.md +3 -3
  281. package/src/config/bundled-skills/settings/TOOLS.json +3 -3
  282. package/src/config/bundled-skills/settings/tools/voice-config-update.ts +26 -22
  283. package/src/config/bundled-skills/slack/SKILL.md +1 -0
  284. package/src/config/bundled-skills/transcribe/SKILL.md +9 -14
  285. package/src/config/bundled-skills/transcribe/TOOLS.json +2 -7
  286. package/src/config/bundled-skills/transcribe/tools/transcribe-media.test.ts +256 -0
  287. package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +38 -188
  288. package/src/config/bundled-tool-registry.ts +8 -0
  289. package/src/config/env-registry.ts +24 -0
  290. package/src/config/env.ts +34 -10
  291. package/src/config/feature-flag-registry.json +46 -14
  292. package/src/config/loader.ts +26 -12
  293. package/src/config/schema.ts +35 -10
  294. package/src/config/schemas/__tests__/stt.test.ts +43 -0
  295. package/src/config/schemas/analysis.ts +51 -0
  296. package/src/config/schemas/backup.ts +72 -0
  297. package/src/config/schemas/calls.ts +1 -26
  298. package/src/config/schemas/elevenlabs.ts +0 -59
  299. package/src/config/schemas/filing.ts +47 -7
  300. package/src/config/schemas/heartbeat.ts +27 -5
  301. package/src/config/schemas/host-browser.ts +47 -1
  302. package/src/config/schemas/inference.ts +1 -1
  303. package/src/config/schemas/memory-lifecycle.ts +14 -2
  304. package/src/config/schemas/services.ts +44 -0
  305. package/src/config/schemas/stt.ts +59 -0
  306. package/src/config/schemas/tts.ts +230 -0
  307. package/src/config/schemas/updates.ts +14 -0
  308. package/src/config/skills.ts +4 -0
  309. package/src/config/types.ts +4 -0
  310. package/src/contacts/contact-store.ts +56 -11
  311. package/src/contacts/contacts-write.ts +38 -1
  312. package/src/context/post-turn-tool-result-truncation.ts +3 -2
  313. package/src/context/tool-result-truncation.ts +2 -1
  314. package/src/context/window-manager.ts +45 -12
  315. package/src/credential-execution/executable-discovery.ts +12 -2
  316. package/src/credential-execution/process-manager.ts +33 -2
  317. package/src/credential-health/credential-health-service.ts +366 -0
  318. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +324 -0
  319. package/src/daemon/__tests__/conversation-surfaces-launch.test.ts +497 -0
  320. package/src/daemon/__tests__/conversation-tool-setup.test.ts +17 -8
  321. package/src/daemon/__tests__/lifecycle-startup-ordering.test.ts +127 -0
  322. package/src/daemon/config-watcher.ts +99 -5
  323. package/src/daemon/conversation-agent-loop-handlers.ts +6 -0
  324. package/src/daemon/conversation-agent-loop.ts +101 -24
  325. package/src/daemon/conversation-error.ts +11 -0
  326. package/src/daemon/conversation-history.ts +40 -6
  327. package/src/daemon/conversation-launch.ts +220 -0
  328. package/src/daemon/conversation-lifecycle.ts +59 -9
  329. package/src/daemon/conversation-messaging.ts +37 -3
  330. package/src/daemon/conversation-notifiers.ts +5 -0
  331. package/src/daemon/conversation-process.ts +581 -19
  332. package/src/daemon/conversation-queue-manager.ts +24 -0
  333. package/src/daemon/conversation-runtime-assembly.ts +11 -1
  334. package/src/daemon/conversation-slash.ts +36 -0
  335. package/src/daemon/conversation-surfaces.ts +94 -4
  336. package/src/daemon/conversation-tool-setup.ts +25 -0
  337. package/src/daemon/conversation-usage.ts +7 -4
  338. package/src/daemon/conversation.ts +86 -28
  339. package/src/daemon/handlers/config-slack-channel.ts +269 -94
  340. package/src/daemon/handlers/conversations.ts +4 -1
  341. package/src/daemon/handlers/shared.ts +22 -0
  342. package/src/daemon/handlers/skills.ts +321 -77
  343. package/src/daemon/host-browser-proxy.ts +2 -1
  344. package/src/daemon/lifecycle.ts +122 -25
  345. package/src/daemon/message-protocol.ts +6 -0
  346. package/src/daemon/message-types/conversations.ts +34 -1
  347. package/src/daemon/message-types/home.ts +40 -0
  348. package/src/daemon/message-types/meet.ts +143 -0
  349. package/src/daemon/message-types/messages.ts +14 -0
  350. package/src/daemon/message-types/schedules.ts +34 -2
  351. package/src/daemon/message-types/skills.ts +16 -0
  352. package/src/daemon/message-types/surfaces.ts +2 -0
  353. package/src/daemon/server.ts +347 -2
  354. package/src/daemon/shutdown-handlers.ts +32 -4
  355. package/src/daemon/shutdown-registry.ts +40 -0
  356. package/src/daemon/tool-side-effects.ts +9 -0
  357. package/src/email/html-renderer.ts +76 -0
  358. package/src/heartbeat/heartbeat-service.ts +93 -7
  359. package/src/home/__tests__/assistant-feed-authoring.test.ts +156 -0
  360. package/src/home/__tests__/emit-feed-event.test.ts +169 -0
  361. package/src/home/__tests__/feed-scheduler.test.ts +194 -0
  362. package/src/home/__tests__/feed-types.test.ts +275 -0
  363. package/src/home/__tests__/feed-writer.test.ts +688 -0
  364. package/src/home/__tests__/phase5-exit-criteria.test.ts +212 -0
  365. package/src/home/__tests__/platform-gmail-digest.test.ts +222 -0
  366. package/src/home/__tests__/progress-formula.test.ts +213 -0
  367. package/src/home/__tests__/relationship-state-writer.test.ts +740 -0
  368. package/src/home/__tests__/rollup-producer.test.ts +398 -0
  369. package/src/home/assistant-feed-authoring.ts +124 -0
  370. package/src/home/emit-feed-event.ts +158 -0
  371. package/src/home/feed-scheduler.ts +247 -0
  372. package/src/home/feed-types.ts +181 -0
  373. package/src/home/feed-writer.ts +469 -0
  374. package/src/home/platform-gmail-digest.ts +163 -0
  375. package/src/home/progress-formula.ts +86 -0
  376. package/src/home/relationship-state-writer.ts +824 -0
  377. package/src/home/relationship-state.ts +143 -0
  378. package/src/home/rollup-producer.ts +384 -0
  379. package/src/hooks/runner.ts +7 -0
  380. package/src/inbound/platform-callback-registration.ts +12 -3
  381. package/src/inbound/public-ingress-urls.ts +12 -0
  382. package/src/instrument.ts +1 -1
  383. package/src/ipc/__tests__/cli-ipc.test.ts +200 -0
  384. package/src/ipc/cli-client.ts +151 -0
  385. package/src/ipc/cli-server.ts +234 -0
  386. package/src/ipc/gateway-client.ts +180 -0
  387. package/src/ipc/routes/index.ts +5 -0
  388. package/src/ipc/routes/wake-conversation.ts +19 -0
  389. package/src/memory/__tests__/auto-analysis-enqueue.test.ts +356 -0
  390. package/src/memory/__tests__/auto-analysis-guard.test.ts +57 -0
  391. package/src/memory/__tests__/conversation-analyze-job.test.ts +232 -0
  392. package/src/memory/__tests__/find-analysis-conversation.test.ts +196 -0
  393. package/src/memory/app-store.ts +1 -1
  394. package/src/memory/attachments-store.ts +70 -0
  395. package/src/memory/auto-analysis-enqueue.ts +127 -0
  396. package/src/memory/auto-analysis-guard.ts +27 -0
  397. package/src/memory/cleanup-schedule-state.ts +37 -0
  398. package/src/memory/conversation-analyze-job.ts +73 -0
  399. package/src/memory/conversation-crud.ts +99 -0
  400. package/src/memory/conversation-disk-view.ts +7 -0
  401. package/src/memory/conversation-group-migration.ts +34 -2
  402. package/src/memory/conversation-queries.ts +6 -5
  403. package/src/memory/db-init.ts +6 -0
  404. package/src/memory/db-maintenance.ts +108 -0
  405. package/src/memory/db.ts +1 -0
  406. package/src/memory/graph/conversation-graph-memory.ts +15 -0
  407. package/src/memory/graph/extraction.test.ts +23 -0
  408. package/src/memory/graph/extraction.ts +8 -0
  409. package/src/memory/graph/retriever.ts +27 -18
  410. package/src/memory/graph/scoring.test.ts +186 -0
  411. package/src/memory/graph/scoring.ts +31 -1
  412. package/src/memory/graph/tools.ts +1 -1
  413. package/src/memory/group-crud.ts +6 -1
  414. package/src/memory/indexer.ts +95 -16
  415. package/src/memory/job-handlers/cleanup.ts +11 -8
  416. package/src/memory/job-handlers/conversation-starters.ts +16 -10
  417. package/src/memory/jobs-store.ts +64 -4
  418. package/src/memory/jobs-worker.ts +22 -9
  419. package/src/memory/llm-usage-store.ts +92 -56
  420. package/src/memory/migrations/219-oauth-providers-token-exchange-body-format.ts +15 -0
  421. package/src/memory/migrations/220-normalize-user-file-by-principal.ts +190 -0
  422. package/src/memory/migrations/221-conversations-archived-at.ts +16 -0
  423. package/src/memory/migrations/index.ts +6 -0
  424. package/src/memory/migrations/registry.ts +8 -0
  425. package/src/memory/qdrant-manager.ts +43 -16
  426. package/src/memory/schema/conversations.ts +2 -0
  427. package/src/memory/schema/oauth.ts +3 -0
  428. package/src/memory/usage-buckets.ts +396 -0
  429. package/src/messaging/providers/gmail/client.ts +57 -6
  430. package/src/messaging/providers/slack/__tests__/adapter-token-routing.test.ts +282 -0
  431. package/src/messaging/providers/slack/adapter.ts +143 -38
  432. package/src/messaging/providers/slack/client.ts +16 -0
  433. package/src/messaging/providers/slack/types.ts +4 -0
  434. package/src/notifications/decision-engine.ts +3 -3
  435. package/src/notifications/signal.ts +5 -0
  436. package/src/oauth/__tests__/identity-verifier.test.ts +1 -0
  437. package/src/oauth/byo-connection.test.ts +18 -1
  438. package/src/oauth/byo-connection.ts +3 -1
  439. package/src/oauth/connect-orchestrator.ts +2 -0
  440. package/src/oauth/connection-resolver.ts +6 -2
  441. package/src/oauth/connection.ts +2 -0
  442. package/src/oauth/oauth-store.ts +9 -0
  443. package/src/oauth/platform-connection.test.ts +98 -0
  444. package/src/oauth/platform-connection.ts +52 -31
  445. package/src/oauth/seed-providers.ts +7 -0
  446. package/src/permissions/checker.ts +16 -6
  447. package/src/permissions/defaults.ts +49 -1
  448. package/src/permissions/trust-store.ts +3 -3
  449. package/src/permissions/workspace-policy.ts +3 -0
  450. package/src/platform/client.test.ts +10 -0
  451. package/src/platform/sync-identity.ts +129 -0
  452. package/src/prompts/persona-resolver.ts +126 -2
  453. package/src/prompts/system-prompt.ts +59 -18
  454. package/src/prompts/templates/BOOTSTRAP.md +5 -5
  455. package/src/prompts/templates/SOUL.md +3 -1
  456. package/src/prompts/templates/UPDATES.md +12 -0
  457. package/src/prompts/templates/channels/slack.md +20 -0
  458. package/src/prompts/update-bulletin-format.ts +26 -9
  459. package/src/prompts/update-bulletin.ts +34 -23
  460. package/src/prompts/user-reference.ts +20 -17
  461. package/src/providers/__tests__/provider-secret-catalog.test.ts +42 -0
  462. package/src/providers/anthropic/client.ts +157 -61
  463. package/src/providers/fireworks/client.ts +2 -2
  464. package/src/providers/gemini/client.ts +9 -1
  465. package/src/providers/model-catalog.ts +6 -0
  466. package/src/providers/model-intents.ts +4 -4
  467. package/src/providers/ollama/client.ts +2 -2
  468. package/src/providers/openai/chat-completions-provider.ts +474 -0
  469. package/src/providers/openai/client.ts +25 -440
  470. package/src/providers/openai/responses-provider.ts +502 -0
  471. package/src/providers/openrouter/client.ts +101 -4
  472. package/src/providers/provider-secret-catalog.ts +139 -0
  473. package/src/providers/registry.ts +2 -2
  474. package/src/providers/retry.ts +14 -3
  475. package/src/providers/speech-to-text/__tests__/provider-catalog.test.ts +251 -0
  476. package/src/providers/speech-to-text/__tests__/resolve.test.ts +828 -0
  477. package/src/providers/speech-to-text/deepgram-realtime.test.ts +980 -0
  478. package/src/providers/speech-to-text/deepgram-realtime.ts +767 -0
  479. package/src/providers/speech-to-text/deepgram.test.ts +332 -0
  480. package/src/providers/speech-to-text/deepgram.ts +115 -0
  481. package/src/providers/speech-to-text/google-gemini-live-stream.test.ts +743 -0
  482. package/src/providers/speech-to-text/google-gemini-live-stream.ts +625 -0
  483. package/src/providers/speech-to-text/google-gemini.test.ts +226 -0
  484. package/src/providers/speech-to-text/google-gemini.ts +101 -0
  485. package/src/providers/speech-to-text/openai-whisper-stream.test.ts +564 -0
  486. package/src/providers/speech-to-text/openai-whisper-stream.ts +381 -0
  487. package/src/providers/speech-to-text/openai-whisper.test.ts +1 -37
  488. package/src/providers/speech-to-text/openai-whisper.ts +63 -33
  489. package/src/providers/speech-to-text/provider-catalog.ts +306 -0
  490. package/src/providers/speech-to-text/resolve.ts +386 -6
  491. package/src/providers/types.ts +9 -0
  492. package/src/runtime/AGENTS.md +43 -1
  493. package/src/runtime/__tests__/agent-wake.test.ts +831 -0
  494. package/src/runtime/__tests__/runtime-mode.test.ts +62 -0
  495. package/src/runtime/__tests__/slack-block-formatting.test.ts +481 -0
  496. package/src/runtime/agent-wake.ts +512 -0
  497. package/src/runtime/auth/__tests__/route-policy.test.ts +40 -0
  498. package/src/runtime/auth/route-policy.ts +30 -5
  499. package/src/runtime/auth/token-service.ts +56 -1
  500. package/src/runtime/btw-sidechain.ts +2 -0
  501. package/src/runtime/capability-tokens.ts +10 -10
  502. package/src/runtime/channel-invite-transport.ts +1 -1
  503. package/src/runtime/channel-invite-transports/email.ts +14 -6
  504. package/src/runtime/channel-readiness-service.ts +12 -22
  505. package/src/runtime/chrome-extension-registry.ts +38 -2
  506. package/src/runtime/http-server.ts +395 -10
  507. package/src/runtime/http-types.ts +6 -2
  508. package/src/runtime/migrations/__tests__/vbundle-import-credentials.test.ts +36 -0
  509. package/src/runtime/migrations/__tests__/vbundle-legacy-user-md.test.ts +360 -0
  510. package/src/runtime/migrations/migration-transport.ts +1 -0
  511. package/src/runtime/migrations/migration-wizard.ts +1 -0
  512. package/src/runtime/migrations/vbundle-import-analyzer.ts +77 -1
  513. package/src/runtime/migrations/vbundle-importer.ts +34 -0
  514. package/src/runtime/pending-interactions.ts +0 -11
  515. package/src/runtime/routes/__tests__/backup-routes.test.ts +967 -0
  516. package/src/runtime/routes/__tests__/home-feed-routes.test.ts +507 -0
  517. package/src/runtime/routes/__tests__/migration-import-credential-filter.test.ts +208 -0
  518. package/src/runtime/routes/__tests__/stt-routes.test.ts +406 -0
  519. package/src/runtime/routes/__tests__/tts-routes.test.ts +474 -0
  520. package/src/runtime/routes/__tests__/user-route-dispatcher.test.ts +148 -17
  521. package/src/runtime/routes/app-management-routes.ts +12 -18
  522. package/src/runtime/routes/attachment-routes.test.ts +9 -3
  523. package/src/runtime/routes/attachment-routes.ts +216 -17
  524. package/src/runtime/routes/backup-routes.ts +519 -0
  525. package/src/runtime/routes/browser-extension-pair-routes.ts +82 -23
  526. package/src/runtime/routes/btw-routes.ts +8 -6
  527. package/src/runtime/routes/contact-routes.test.ts +298 -0
  528. package/src/runtime/routes/contact-routes.ts +132 -5
  529. package/src/runtime/routes/conversation-analysis-routes.ts +22 -142
  530. package/src/runtime/routes/conversation-management-routes.ts +115 -0
  531. package/src/runtime/routes/conversation-routes.ts +367 -146
  532. package/src/runtime/routes/filing-routes.ts +93 -0
  533. package/src/runtime/routes/home-feed-routes.ts +334 -0
  534. package/src/runtime/routes/home-state-routes.ts +138 -0
  535. package/src/runtime/routes/host-browser-routes.ts +3 -14
  536. package/src/runtime/routes/identity-intro-cache.ts +7 -3
  537. package/src/runtime/routes/identity-routes.ts +3 -17
  538. package/src/runtime/routes/inbound-stages/transcribe-audio.test.ts +46 -39
  539. package/src/runtime/routes/inbound-stages/transcribe-audio.ts +15 -15
  540. package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +137 -0
  541. package/src/runtime/routes/integrations/slack/__tests__/share.test.ts +179 -0
  542. package/src/runtime/routes/integrations/slack/channel.ts +11 -3
  543. package/src/runtime/routes/integrations/slack/share.ts +45 -7
  544. package/src/runtime/routes/llm-context-normalization.ts +303 -0
  545. package/src/runtime/routes/memory-item-routes.test.ts +3 -2
  546. package/src/runtime/routes/migration-routes.ts +40 -5
  547. package/src/runtime/routes/settings-routes.ts +22 -5
  548. package/src/runtime/routes/skills-routes.ts +76 -7
  549. package/src/runtime/routes/stt-routes.ts +233 -0
  550. package/src/runtime/routes/surface-action-routes.ts +41 -2
  551. package/src/runtime/routes/tts-routes.ts +108 -24
  552. package/src/runtime/routes/usage-routes.ts +30 -2
  553. package/src/runtime/routes/user-route-dispatcher.ts +50 -5
  554. package/src/runtime/routes/user-routes.ts +13 -1
  555. package/src/runtime/routes/work-items-routes.ts +8 -1
  556. package/src/runtime/runtime-mode.ts +33 -0
  557. package/src/runtime/services/__tests__/analyze-conversation.test.ts +444 -0
  558. package/src/runtime/services/__tests__/analyze-deps-singleton.test.ts +67 -0
  559. package/src/runtime/services/__tests__/auto-analysis-prompt.test.ts +53 -0
  560. package/src/runtime/services/__tests__/manual-analysis-prompt.test.ts +41 -0
  561. package/src/runtime/services/analyze-conversation.ts +344 -0
  562. package/src/runtime/services/analyze-deps-singleton.ts +32 -0
  563. package/src/runtime/services/auto-analysis-prompt.ts +55 -0
  564. package/src/runtime/skill-route-registry.ts +49 -0
  565. package/src/runtime/slack-block-formatting.ts +437 -10
  566. package/src/schedule/scheduler.ts +50 -0
  567. package/src/security/oauth2.ts +26 -4
  568. package/src/security/secure-keys.ts +25 -2
  569. package/src/security/token-manager.ts +8 -0
  570. package/src/sequence/engine.ts +23 -0
  571. package/src/sequence/types.ts +1 -1
  572. package/src/skills/catalog-files.ts +64 -2
  573. package/src/skills/category-inference.ts +122 -0
  574. package/src/skills/clawhub-files.ts +213 -0
  575. package/src/skills/clawhub.ts +84 -23
  576. package/src/skills/skill-file-provider.ts +40 -0
  577. package/src/skills/skillssh-files.ts +395 -0
  578. package/src/skills/skillssh-registry.ts +4 -4
  579. package/src/stt/__tests__/daemon-batch-transcriber.test.ts +392 -0
  580. package/src/stt/__tests__/types.test.ts +89 -0
  581. package/src/stt/daemon-batch-transcriber.ts +195 -0
  582. package/src/stt/stt-stream-session.ts +499 -0
  583. package/src/stt/types.ts +330 -0
  584. package/src/stt/wav-encoder.test.ts +373 -0
  585. package/src/stt/wav-encoder.ts +175 -0
  586. package/src/subagent/manager.ts +38 -14
  587. package/src/tools/browser/__tests__/browser-mode.test.ts +119 -0
  588. package/src/tools/browser/__tests__/browser-status.test.ts +123 -0
  589. package/src/tools/browser/browser-execution.ts +1163 -23
  590. package/src/tools/browser/browser-manager.ts +45 -0
  591. package/src/tools/browser/browser-mode-constants.ts +12 -0
  592. package/src/tools/browser/browser-mode.ts +92 -0
  593. package/src/tools/browser/browser-status-constants.ts +33 -0
  594. package/src/tools/browser/cdp-client/__tests__/cdp-inspect-client.test.ts +393 -0
  595. package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +29 -0
  596. package/src/tools/browser/cdp-client/__tests__/factory.test.ts +1648 -32
  597. package/src/tools/browser/cdp-client/cdp-inspect/__tests__/discovery.test.ts +264 -0
  598. package/src/tools/browser/cdp-client/cdp-inspect/discovery.ts +183 -17
  599. package/src/tools/browser/cdp-client/cdp-inspect-client.ts +254 -21
  600. package/src/tools/browser/cdp-client/errors.ts +15 -0
  601. package/src/tools/browser/cdp-client/extension-cdp-client.ts +39 -16
  602. package/src/tools/browser/cdp-client/factory.ts +797 -87
  603. package/src/tools/browser/cdp-client/index.ts +16 -2
  604. package/src/tools/browser/cdp-client/types.ts +68 -0
  605. package/src/tools/credentials/vault.ts +35 -6
  606. package/src/tools/network/web-fetch.ts +5 -2
  607. package/src/tools/network/web-search.ts +5 -2
  608. package/src/tools/shared/shell-output.ts +3 -1
  609. package/src/tools/side-effects.ts +2 -0
  610. package/src/tools/skills/sandbox-runner.ts +3 -2
  611. package/src/tools/terminal/safe-env.ts +10 -2
  612. package/src/tools/terminal/shell.ts +15 -4
  613. package/src/tools/tool-manifest.ts +21 -0
  614. package/src/tools/types.ts +17 -0
  615. package/src/tools/ui-surface/definitions.ts +6 -1
  616. package/src/tts/__tests__/provider-adapters.test.ts +834 -0
  617. package/src/tts/__tests__/provider-catalog-consistency.test.ts +196 -0
  618. package/src/tts/__tests__/provider-catalog.test.ts +183 -0
  619. package/src/tts/__tests__/provider-registry.test.ts +90 -0
  620. package/src/tts/provider-catalog.ts +201 -0
  621. package/src/tts/provider-registry.ts +73 -0
  622. package/src/tts/providers/deepgram-provider.ts +219 -0
  623. package/src/tts/providers/elevenlabs-provider.ts +211 -0
  624. package/src/tts/providers/fish-audio-provider.ts +183 -0
  625. package/src/tts/providers/index.ts +42 -0
  626. package/src/tts/providers/register-builtins.ts +130 -0
  627. package/src/tts/synthesize-text.ts +110 -0
  628. package/src/tts/tts-config-resolver.ts +78 -0
  629. package/src/tts/types.ts +153 -0
  630. package/src/types/onboarding-context.ts +7 -0
  631. package/src/util/abort-reasons.ts +58 -0
  632. package/src/util/device-id.ts +32 -16
  633. package/src/util/errors.ts +9 -1
  634. package/src/util/platform.ts +54 -10
  635. package/src/util/pricing.ts +66 -3
  636. package/src/util/spawn.ts +1 -1
  637. package/src/util/truncate.ts +4 -2
  638. package/src/util/unicode.ts +201 -0
  639. package/src/version.ts +19 -24
  640. package/src/watcher/engine.ts +23 -0
  641. package/src/watcher/watcher-store.ts +31 -0
  642. package/src/workspace/migrations/003-seed-device-id.ts +9 -3
  643. package/src/workspace/migrations/017-seed-persona-dirs.ts +68 -4
  644. package/src/workspace/migrations/029-seed-pkb.ts +1 -1
  645. package/src/workspace/migrations/031-drop-user-md.ts +317 -0
  646. package/src/workspace/migrations/031-llm-log-retention-zero-to-null.ts +73 -0
  647. package/src/workspace/migrations/032-tts-provider-unification.ts +227 -0
  648. package/src/workspace/migrations/033-stt-service-explicit-config.ts +122 -0
  649. package/src/workspace/migrations/034-remove-calls-voice-transcription-provider.ts +215 -0
  650. package/src/workspace/migrations/035-seed-slack-channel-persona.ts +50 -0
  651. package/src/workspace/migrations/036-update-pkb-index-bar.ts +37 -0
  652. package/src/workspace/migrations/037-create-meets-dir.ts +61 -0
  653. package/src/workspace/migrations/registry.ts +16 -0
  654. package/src/workspace/top-level-renderer.ts +13 -1
  655. package/src/workspace/turn-commit.ts +31 -0
  656. package/src/__tests__/email-cli.test.ts +0 -297
  657. package/src/__tests__/email-service-config-fallback.test.ts +0 -102
  658. package/src/cli/commands/browser-relay.ts +0 -466
  659. package/src/email/guardrails.ts +0 -221
  660. package/src/email/provider.ts +0 -117
  661. package/src/email/providers/agentmail.ts +0 -361
  662. package/src/email/providers/index.ts +0 -65
  663. package/src/email/service.ts +0 -384
  664. package/src/email/types.ts +0 -126
  665. package/src/prompts/templates/USER.md +0 -13
  666. package/src/providers/speech-to-text/types.ts +0 -17
  667. package/src/runtime/routes/browser-cdp-routes.ts +0 -229
@@ -0,0 +1,767 @@
1
+ /**
2
+ * Deepgram realtime streaming STT adapter.
3
+ *
4
+ * Opens a WebSocket session against Deepgram's live transcription endpoint
5
+ * (`/v1/listen`), forwards PCM audio frames from the caller, and normalizes
6
+ * Deepgram's streaming response payloads (`is_final`, `speech_final`,
7
+ * endpointing metadata) into the daemon's {@link SttStreamServerEvent}
8
+ * contract with stable partial/final semantics.
9
+ *
10
+ * Lifecycle:
11
+ * 1. {@link start} opens the WebSocket and resolves once the connection is
12
+ * established.
13
+ * 2. {@link sendAudio} forwards audio chunks over the open socket with
14
+ * backpressure-safe bufferedAmount checks.
15
+ * 3. {@link stop} sends the Deepgram `CloseStream` message and waits for
16
+ * the provider to flush any remaining finals before closing.
17
+ * 4. The `onEvent` callback receives `partial`, `final`, `error`, and
18
+ * `closed` events throughout the session lifetime.
19
+ *
20
+ * Error handling:
21
+ * - Provider WebSocket errors and unexpected closes are mapped to
22
+ * {@link SttStreamServerErrorEvent} with appropriate categories.
23
+ * - A configurable inactivity timeout fires a `closed` event if the
24
+ * provider stops sending data mid-session.
25
+ * - All timers and listeners are cleaned up on close to prevent leaks.
26
+ */
27
+
28
+ import type {
29
+ StreamingTranscriber,
30
+ SttStreamServerEvent,
31
+ } from "../../stt/types.js";
32
+ import { getLogger } from "../../util/logger.js";
33
+
34
+ const log = getLogger("deepgram-realtime");
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // Constants
38
+ // ---------------------------------------------------------------------------
39
+
40
+ const DEFAULT_WS_BASE_URL = "wss://api.deepgram.com";
41
+ const DEFAULT_MODEL = "nova-2";
42
+
43
+ /**
44
+ * Default timeout (ms) for the WebSocket connection handshake.
45
+ * If the socket does not reach OPEN within this window, start() rejects.
46
+ */
47
+ const DEFAULT_CONNECT_TIMEOUT_MS = 10_000;
48
+
49
+ /**
50
+ * Default inactivity timeout (ms). If no message is received from Deepgram
51
+ * for this duration after the session is open, the adapter closes with a
52
+ * timeout error. This guards against provider-side hangs.
53
+ */
54
+ const DEFAULT_INACTIVITY_TIMEOUT_MS = 30_000;
55
+
56
+ /**
57
+ * Maximum WebSocket bufferedAmount (bytes) before sendAudio applies
58
+ * backpressure by dropping frames. This prevents unbounded memory growth
59
+ * if the network or provider cannot keep up with the audio rate.
60
+ */
61
+ const MAX_BUFFERED_AMOUNT = 1024 * 1024; // 1 MiB
62
+
63
+ /**
64
+ * Grace period (ms) after sending CloseStream before we force-close
65
+ * the WebSocket. Gives Deepgram time to flush any remaining finals.
66
+ */
67
+ const CLOSE_GRACE_MS = 5_000;
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // Options
71
+ // ---------------------------------------------------------------------------
72
+
73
+ export interface DeepgramRealtimeOptions {
74
+ /** Deepgram model to use (default: "nova-2"). */
75
+ model?: string;
76
+ /** BCP-47 language code (e.g. "en", "es"). Omitted by default (auto-detect). */
77
+ language?: string;
78
+ /** Enable Deepgram smart formatting (punctuation, numerals, etc.). Default: true. */
79
+ smartFormatting?: boolean;
80
+ /** Enable interim (partial) results. Default: true. */
81
+ interimResults?: boolean;
82
+ /** Enable utterance end detection (endpointing). Default: true. */
83
+ utteranceEndMs?: number;
84
+ /** Override the Deepgram WebSocket base URL (useful for proxies or on-prem). */
85
+ baseUrl?: string;
86
+ /** Connect timeout in milliseconds. Default: 10_000. */
87
+ connectTimeoutMs?: number;
88
+ /** Inactivity timeout in milliseconds. Default: 30_000. */
89
+ inactivityTimeoutMs?: number;
90
+ /** Audio sample rate in Hz (default: 16000). Passed through from the client WebSocket connection. */
91
+ sampleRate?: number;
92
+ /**
93
+ * Enable Deepgram's built-in speaker diarization. Default: false.
94
+ *
95
+ * When `true`, the adapter appends `diarize=true` to the Deepgram live
96
+ * URL so Deepgram attaches a `speaker` integer to each word (and
97
+ * sometimes a top-level `speaker` to the alternative). The adapter
98
+ * aggregates per-segment speakers (mode, with first-word tiebreaker)
99
+ * into a single `speakerLabel` emitted on `partial` / `final` events,
100
+ * alongside the alternative's `confidence`. Consumers (e.g. Meet) use
101
+ * this stable-within-session label to bind opaque ASR speakers to real
102
+ * participant identities.
103
+ *
104
+ * Kept off by default so existing non-Meet callers (telephony, chat
105
+ * composer) preserve their current lean URL + response shape.
106
+ */
107
+ diarize?: boolean;
108
+ }
109
+
110
+ // ---------------------------------------------------------------------------
111
+ // Deepgram streaming response types (subset relevant to transcript events)
112
+ // ---------------------------------------------------------------------------
113
+
114
+ /**
115
+ * A single word within a Deepgram streaming alternative. When diarization
116
+ * is enabled, each word carries a numeric `speaker` tag identifying the
117
+ * detected speaker turn — stable within a session (but opaque — Deepgram
118
+ * has no real-world identity).
119
+ */
120
+ interface DeepgramStreamWord {
121
+ word?: string;
122
+ speaker?: number;
123
+ confidence?: number;
124
+ start?: number;
125
+ end?: number;
126
+ }
127
+
128
+ /**
129
+ * A single transcript alternative within a Deepgram streaming response.
130
+ *
131
+ * When `diarize=true`, Deepgram attaches per-word speaker tags in the
132
+ * `words` array. Some API versions also surface a top-level `speaker`
133
+ * tag on the alternative itself when a chunk is dominated by a single
134
+ * speaker — we check both fields when extracting a label for the chunk.
135
+ */
136
+ interface DeepgramStreamAlternative {
137
+ transcript?: string;
138
+ confidence?: number;
139
+ /** Present on some API versions when the chunk has a dominant speaker. */
140
+ speaker?: number;
141
+ /** Per-word speaker tags when diarization is enabled. */
142
+ words?: DeepgramStreamWord[];
143
+ }
144
+
145
+ /** A channel within a Deepgram streaming response. */
146
+ interface DeepgramStreamChannel {
147
+ alternatives?: DeepgramStreamAlternative[];
148
+ }
149
+
150
+ /**
151
+ * The top-level Deepgram streaming response frame.
152
+ *
153
+ * Key fields for event normalization:
154
+ * - `is_final` — true when the transcript for this audio segment is committed
155
+ * and will not be revised. When false, the transcript is interim (partial).
156
+ * - `speech_final` — true when Deepgram's endpointing detects a natural
157
+ * speech pause. Combined with `is_final`, this signals a committed utterance
158
+ * boundary. We emit a `final` event only when `is_final` is true.
159
+ * - `type` — `"Results"` for transcript frames, `"Metadata"` for session info,
160
+ * `"UtteranceEnd"` for endpointing signals.
161
+ */
162
+ interface DeepgramStreamResponse {
163
+ type?: string;
164
+ is_final?: boolean;
165
+ speech_final?: boolean;
166
+ channel?: DeepgramStreamChannel;
167
+ channel_index?: number[];
168
+ /** Duration of the audio segment in seconds. */
169
+ duration?: number;
170
+ /** Start offset of the audio segment in seconds. */
171
+ start?: number;
172
+ }
173
+
174
+ // ---------------------------------------------------------------------------
175
+ // Minimal WebSocket interface
176
+ // ---------------------------------------------------------------------------
177
+
178
+ /**
179
+ * Minimal structural WebSocket interface so we can test without depending
180
+ * on Bun's global WebSocket type at the type level.
181
+ */
182
+ interface WsLike {
183
+ readonly readyState: number;
184
+ readonly bufferedAmount: number;
185
+ send(data: string | ArrayBufferLike | ArrayBuffer | Uint8Array): void;
186
+ close(code?: number, reason?: string): void;
187
+ addEventListener(type: "open", listener: () => void): void;
188
+ addEventListener(
189
+ type: "close",
190
+ listener: (ev: { code: number; reason: string }) => void,
191
+ ): void;
192
+ addEventListener(type: "error", listener: (ev: unknown) => void): void;
193
+ addEventListener(
194
+ type: "message",
195
+ listener: (ev: { data: unknown }) => void,
196
+ ): void;
197
+ removeEventListener(type: string, listener: unknown): void;
198
+ }
199
+
200
+ const WS_OPEN = 1;
201
+
202
+ // ---------------------------------------------------------------------------
203
+ // Adapter implementation
204
+ // ---------------------------------------------------------------------------
205
+
206
+ /**
207
+ * Deepgram realtime streaming transcriber.
208
+ *
209
+ * Implements the daemon {@link StreamingTranscriber} contract on top of
210
+ * Deepgram's live transcription WebSocket API.
211
+ */
212
+ export class DeepgramRealtimeTranscriber implements StreamingTranscriber {
213
+ readonly providerId = "deepgram" as const;
214
+ readonly boundaryId = "daemon-streaming" as const;
215
+
216
+ private readonly apiKey: string;
217
+ private readonly model: string;
218
+ private readonly language: string | undefined;
219
+ private readonly smartFormatting: boolean;
220
+ private readonly interimResults: boolean;
221
+ private readonly utteranceEndMs: number | undefined;
222
+ private readonly baseUrl: string;
223
+ private readonly connectTimeoutMs: number;
224
+ private readonly inactivityTimeoutMs: number;
225
+ private readonly sampleRate: number;
226
+ /**
227
+ * Whether speaker diarization is requested. Forwarded to the Deepgram
228
+ * WebSocket as `diarize=true` and drives speaker-label extraction from
229
+ * Results frames — see {@link DeepgramRealtimeOptions.diarize}.
230
+ */
231
+ private readonly diarize: boolean;
232
+
233
+ /** The live WebSocket connection, set during start(). */
234
+ private ws: WsLike | null = null;
235
+
236
+ /** Callback for emitting events to the session orchestrator. */
237
+ private onEvent: ((event: SttStreamServerEvent) => void) | null = null;
238
+
239
+ /** Whether the session has been fully closed. */
240
+ private closed = false;
241
+
242
+ /** Whether stop() has been called. */
243
+ private stopping = false;
244
+
245
+ /** Inactivity timer handle. */
246
+ private inactivityTimer: ReturnType<typeof setTimeout> | null = null;
247
+
248
+ /** Close grace timer handle. */
249
+ private closeGraceTimer: ReturnType<typeof setTimeout> | null = null;
250
+
251
+ constructor(apiKey: string, options: DeepgramRealtimeOptions = {}) {
252
+ this.apiKey = apiKey;
253
+ this.model = options.model ?? DEFAULT_MODEL;
254
+ this.language = options.language;
255
+ this.smartFormatting = options.smartFormatting ?? true;
256
+ this.interimResults = options.interimResults ?? true;
257
+ this.utteranceEndMs = options.utteranceEndMs;
258
+ this.baseUrl = (options.baseUrl ?? DEFAULT_WS_BASE_URL).replace(/\/+$/, "");
259
+ this.connectTimeoutMs =
260
+ options.connectTimeoutMs ?? DEFAULT_CONNECT_TIMEOUT_MS;
261
+ this.inactivityTimeoutMs =
262
+ options.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
263
+ this.sampleRate = options.sampleRate ?? 16_000;
264
+ this.diarize = options.diarize ?? false;
265
+ }
266
+
267
+ // ── StreamingTranscriber interface ──────────────────────────────────
268
+
269
+ async start(onEvent: (event: SttStreamServerEvent) => void): Promise<void> {
270
+ if (this.ws) {
271
+ throw new Error("DeepgramRealtimeTranscriber: start() called twice");
272
+ }
273
+ this.onEvent = onEvent;
274
+
275
+ const url = this.buildWebSocketUrl();
276
+ log.info({ url }, "Opening Deepgram realtime session");
277
+
278
+ const ws = this.createWebSocket(url);
279
+ this.ws = ws;
280
+
281
+ // Wait for the WebSocket to open or fail.
282
+ await new Promise<void>((resolve, reject) => {
283
+ let settled = false;
284
+
285
+ const connectTimer = setTimeout(() => {
286
+ if (settled) return;
287
+ settled = true;
288
+ this.forceClose();
289
+ reject(new Error("Deepgram realtime connect timeout"));
290
+ }, this.connectTimeoutMs);
291
+
292
+ const onOpen = () => {
293
+ if (settled) return;
294
+ settled = true;
295
+ clearTimeout(connectTimer);
296
+ resolve();
297
+ };
298
+
299
+ const onError = (ev: unknown) => {
300
+ if (settled) return;
301
+ settled = true;
302
+ clearTimeout(connectTimer);
303
+ const msg =
304
+ ev instanceof Error
305
+ ? ev.message
306
+ : typeof ev === "object" && ev !== null && "message" in ev
307
+ ? String((ev as { message: unknown }).message)
308
+ : "WebSocket error during connect";
309
+ reject(new Error(`Deepgram realtime connect error: ${msg}`));
310
+ };
311
+
312
+ const onClose = (ev: { code: number; reason: string }) => {
313
+ if (settled) return;
314
+ settled = true;
315
+ clearTimeout(connectTimer);
316
+ reject(
317
+ new Error(
318
+ `Deepgram WebSocket closed before open (code=${ev.code}, reason=${ev.reason})`,
319
+ ),
320
+ );
321
+ };
322
+
323
+ ws.addEventListener("open", onOpen);
324
+ ws.addEventListener("error", onError);
325
+ ws.addEventListener("close", onClose);
326
+ });
327
+
328
+ // Socket is now open — attach the message/close/error handlers for
329
+ // the active session lifetime.
330
+ this.attachSessionHandlers(ws);
331
+ this.resetInactivityTimer();
332
+
333
+ log.info("Deepgram realtime session opened");
334
+ }
335
+
336
+ sendAudio(audio: Buffer, _mimeType: string): void {
337
+ if (this.closed || this.stopping) return;
338
+
339
+ const ws = this.ws;
340
+ if (!ws || ws.readyState !== WS_OPEN) return;
341
+
342
+ // Backpressure check — drop frames if the outbound buffer is too full
343
+ // to prevent unbounded memory growth.
344
+ if (ws.bufferedAmount > MAX_BUFFERED_AMOUNT) {
345
+ log.warn(
346
+ { bufferedAmount: ws.bufferedAmount },
347
+ "Deepgram realtime backpressure: dropping audio frame",
348
+ );
349
+ return;
350
+ }
351
+
352
+ // Deepgram's live endpoint accepts raw audio bytes on the WebSocket.
353
+ ws.send(new Uint8Array(audio));
354
+ }
355
+
356
+ stop(): void {
357
+ if (this.closed || this.stopping) return;
358
+ this.stopping = true;
359
+
360
+ log.info("Stopping Deepgram realtime session");
361
+
362
+ const ws = this.ws;
363
+ if (!ws || ws.readyState !== WS_OPEN) {
364
+ this.emitClosedAndCleanup();
365
+ return;
366
+ }
367
+
368
+ // Send the Deepgram CloseStream message to signal end-of-audio.
369
+ // The provider may flush remaining finals before closing.
370
+ try {
371
+ ws.send(JSON.stringify({ type: "CloseStream" }));
372
+ } catch {
373
+ // If the send fails, force-close immediately.
374
+ this.emitClosedAndCleanup();
375
+ return;
376
+ }
377
+
378
+ // Start a grace timer — if the provider doesn't close within the
379
+ // grace window, we force-close to prevent session leaks.
380
+ this.closeGraceTimer = setTimeout(() => {
381
+ log.warn("Deepgram realtime close grace timeout — forcing close");
382
+ this.emitClosedAndCleanup();
383
+ }, CLOSE_GRACE_MS);
384
+ }
385
+
386
+ // ── WebSocket lifecycle ─────────────────────────────────────────────
387
+
388
+ /**
389
+ * Create a WebSocket instance. Factored out for test mockability.
390
+ *
391
+ * Passes the Deepgram API key via the `Authorization: Token <key>` header.
392
+ * Bun's WebSocket constructor supports a second `options` argument with
393
+ * custom headers, unlike the browser WebSocket API.
394
+ */
395
+ private createWebSocket(url: string): WsLike {
396
+ const WebSocketCtor = (
397
+ globalThis as unknown as {
398
+ WebSocket: new (
399
+ url: string,
400
+ options?: { headers?: Record<string, string> },
401
+ ) => WsLike;
402
+ }
403
+ ).WebSocket;
404
+ if (typeof WebSocketCtor !== "function") {
405
+ throw new Error("global WebSocket is not available in this runtime");
406
+ }
407
+ return new WebSocketCtor(url, {
408
+ headers: {
409
+ Authorization: `Token ${this.apiKey}`,
410
+ },
411
+ });
412
+ }
413
+
414
+ /**
415
+ * Attach session-lifetime handlers (message, close, error) to the
416
+ * opened WebSocket. These handlers drive the event normalization
417
+ * pipeline.
418
+ */
419
+ private attachSessionHandlers(ws: WsLike): void {
420
+ ws.addEventListener("message", (ev: { data: unknown }) => {
421
+ this.handleProviderMessage(ev.data);
422
+ });
423
+
424
+ ws.addEventListener("close", (ev: { code: number; reason: string }) => {
425
+ this.handleProviderClose(ev.code, ev.reason);
426
+ });
427
+
428
+ ws.addEventListener("error", (ev: unknown) => {
429
+ this.handleProviderError(ev);
430
+ });
431
+ }
432
+
433
+ // ── Provider message handling ───────────────────────────────────────
434
+
435
+ /**
436
+ * Parse and normalize a Deepgram streaming response into daemon events.
437
+ */
438
+ private handleProviderMessage(data: unknown): void {
439
+ if (this.closed) return;
440
+
441
+ this.resetInactivityTimer();
442
+
443
+ let raw: string;
444
+ if (typeof data === "string") {
445
+ raw = data;
446
+ } else if (data instanceof ArrayBuffer) {
447
+ raw = new TextDecoder().decode(data);
448
+ } else {
449
+ // Unexpected binary format — ignore.
450
+ return;
451
+ }
452
+
453
+ let frame: DeepgramStreamResponse;
454
+ try {
455
+ frame = JSON.parse(raw) as DeepgramStreamResponse;
456
+ } catch {
457
+ log.debug("Dropped non-JSON Deepgram frame");
458
+ return;
459
+ }
460
+
461
+ if (!frame || typeof frame !== "object") return;
462
+
463
+ // Deepgram uses `type: "Results"` for transcript frames.
464
+ if (frame.type === "Results") {
465
+ this.handleTranscriptFrame(frame);
466
+ return;
467
+ }
468
+
469
+ // `UtteranceEnd` is an endpointing signal — no transcript text, but
470
+ // it confirms the previous is_final segment is a natural boundary.
471
+ // We don't need to emit an additional event since we already emit
472
+ // finals on is_final=true.
473
+ if (frame.type === "UtteranceEnd") {
474
+ log.debug("Received UtteranceEnd signal");
475
+ return;
476
+ }
477
+
478
+ // Metadata and other frame types are informational — no action needed.
479
+ }
480
+
481
+ /**
482
+ * Normalize a Deepgram `Results` frame into partial or final events.
483
+ *
484
+ * Deepgram semantics:
485
+ * - `is_final: false` — interim transcript, may be revised.
486
+ * - `is_final: true` — committed transcript for this segment.
487
+ * - `speech_final: true` — endpointing detected a pause; combined with
488
+ * `is_final: true`, this marks a natural utterance boundary.
489
+ *
490
+ * When {@link DeepgramRealtimeOptions.diarize} is enabled, the frame
491
+ * also carries per-word speaker tags under
492
+ * `channel.alternatives[0].words[].speaker`. We derive a single
493
+ * per-chunk `speakerLabel` by picking the dominant speaker across the
494
+ * words — see {@link extractSpeakerLabel}. Confidence is taken from
495
+ * the top alternative when present.
496
+ *
497
+ * We emit:
498
+ * - `partial` for `is_final: false` frames (if interim results enabled).
499
+ * - `final` for `is_final: true` frames.
500
+ */
501
+ private handleTranscriptFrame(frame: DeepgramStreamResponse): void {
502
+ const alternative = frame.channel?.alternatives?.[0];
503
+ const transcript = alternative?.transcript;
504
+
505
+ // Extract text, defaulting to empty string for silence segments.
506
+ const text = typeof transcript === "string" ? transcript.trim() : "";
507
+
508
+ const speakerLabel = this.diarize
509
+ ? extractSpeakerLabel(alternative)
510
+ : undefined;
511
+ const confidence =
512
+ typeof alternative?.confidence === "number"
513
+ ? alternative.confidence
514
+ : undefined;
515
+
516
+ if (frame.is_final) {
517
+ // Committed transcript — emit as final.
518
+ this.emitEvent({
519
+ type: "final",
520
+ text,
521
+ ...(speakerLabel !== undefined ? { speakerLabel } : {}),
522
+ ...(confidence !== undefined ? { confidence } : {}),
523
+ });
524
+ } else if (this.interimResults) {
525
+ // Interim transcript — emit as partial.
526
+ this.emitEvent({
527
+ type: "partial",
528
+ text,
529
+ ...(speakerLabel !== undefined ? { speakerLabel } : {}),
530
+ ...(confidence !== undefined ? { confidence } : {}),
531
+ });
532
+ }
533
+ }
534
+
535
+ /**
536
+ * Handle provider-side WebSocket close.
537
+ */
538
+ private handleProviderClose(code: number, reason: string): void {
539
+ if (this.closed) return;
540
+
541
+ // Normal close (1000) or going-away (1001) after stop() is expected.
542
+ if (this.stopping && (code === 1000 || code === 1001)) {
543
+ log.info({ code, reason }, "Deepgram realtime session closed normally");
544
+ this.emitClosedAndCleanup();
545
+ return;
546
+ }
547
+
548
+ // Unexpected close — map to an error event.
549
+ log.warn({ code, reason }, "Deepgram realtime session closed unexpectedly");
550
+
551
+ const category =
552
+ code === 1008 || code === 4001
553
+ ? ("auth" as const)
554
+ : code === 1013
555
+ ? ("rate-limit" as const)
556
+ : ("provider-error" as const);
557
+
558
+ this.emitEvent({
559
+ type: "error",
560
+ category,
561
+ message: `Deepgram WebSocket closed (code=${code}, reason=${reason})`,
562
+ });
563
+ this.emitClosedAndCleanup();
564
+ }
565
+
566
+ /**
567
+ * Handle provider-side WebSocket error.
568
+ */
569
+ private handleProviderError(ev: unknown): void {
570
+ if (this.closed) return;
571
+
572
+ const message =
573
+ ev instanceof Error
574
+ ? ev.message
575
+ : typeof ev === "object" && ev !== null && "message" in ev
576
+ ? String((ev as { message: unknown }).message)
577
+ : "WebSocket error";
578
+
579
+ log.error({ error: ev }, "Deepgram realtime WebSocket error");
580
+
581
+ this.emitEvent({
582
+ type: "error",
583
+ category: "provider-error",
584
+ message: `Deepgram WebSocket error: ${message}`,
585
+ });
586
+ this.emitClosedAndCleanup();
587
+ }
588
+
589
+ // ── Event emission & cleanup ────────────────────────────────────────
590
+
591
+ /**
592
+ * Emit a server event to the session orchestrator. Swallows listener
593
+ * errors to prevent tearing down the adapter.
594
+ */
595
+ private emitEvent(event: SttStreamServerEvent): void {
596
+ if (!this.onEvent) return;
597
+ try {
598
+ this.onEvent(event);
599
+ } catch (err) {
600
+ log.warn({ error: err }, "Listener error in Deepgram realtime adapter");
601
+ }
602
+ }
603
+
604
+ /**
605
+ * Emit a `closed` event and clean up all resources (timers, WebSocket).
606
+ * Idempotent — safe to call multiple times.
607
+ */
608
+ private emitClosedAndCleanup(): void {
609
+ if (this.closed) return;
610
+ this.closed = true;
611
+
612
+ this.clearTimers();
613
+ this.forceClose();
614
+
615
+ this.emitEvent({ type: "closed" });
616
+ this.onEvent = null;
617
+ }
618
+
619
+ /**
620
+ * Force-close the WebSocket without emitting events. Used during
621
+ * cleanup and timeout paths.
622
+ */
623
+ private forceClose(): void {
624
+ const ws = this.ws;
625
+ this.ws = null;
626
+ if (!ws) return;
627
+
628
+ try {
629
+ ws.close();
630
+ } catch {
631
+ // Best effort — already closed sockets may throw.
632
+ }
633
+ }
634
+
635
+ /**
636
+ * Clear all active timers.
637
+ */
638
+ private clearTimers(): void {
639
+ if (this.inactivityTimer !== null) {
640
+ clearTimeout(this.inactivityTimer);
641
+ this.inactivityTimer = null;
642
+ }
643
+ if (this.closeGraceTimer !== null) {
644
+ clearTimeout(this.closeGraceTimer);
645
+ this.closeGraceTimer = null;
646
+ }
647
+ }
648
+
649
+ /**
650
+ * Reset the inactivity timer. Called on inbound provider messages to
651
+ * detect provider-side hangs. Not reset on outbound audio sends —
652
+ * continuous audio from the caller must not mask a silent provider.
653
+ */
654
+ private resetInactivityTimer(): void {
655
+ if (this.closed || this.stopping) return;
656
+
657
+ if (this.inactivityTimer !== null) {
658
+ clearTimeout(this.inactivityTimer);
659
+ }
660
+
661
+ this.inactivityTimer = setTimeout(() => {
662
+ if (this.closed) return;
663
+
664
+ log.warn("Deepgram realtime inactivity timeout");
665
+ this.emitEvent({
666
+ type: "error",
667
+ category: "timeout",
668
+ message: "Deepgram realtime session timed out due to inactivity",
669
+ });
670
+ this.emitClosedAndCleanup();
671
+ }, this.inactivityTimeoutMs);
672
+ }
673
+
674
+ // ── URL construction ────────────────────────────────────────────────
675
+
676
+ /**
677
+ * Build the Deepgram live transcription WebSocket URL with query params.
678
+ *
679
+ * Audio format and feature flags are passed as query parameters.
680
+ * Authentication is handled separately via the `Authorization` header
681
+ * in {@link createWebSocket}.
682
+ */
683
+ private buildWebSocketUrl(): string {
684
+ const params = new URLSearchParams();
685
+ params.set("model", this.model);
686
+
687
+ if (this.language) {
688
+ params.set("language", this.language);
689
+ }
690
+ if (this.smartFormatting) {
691
+ params.set("smart_format", "true");
692
+ }
693
+ if (this.interimResults) {
694
+ params.set("interim_results", "true");
695
+ }
696
+ if (this.utteranceEndMs !== undefined) {
697
+ params.set("utterance_end_ms", String(this.utteranceEndMs));
698
+ }
699
+ if (this.diarize) {
700
+ params.set("diarize", "true");
701
+ }
702
+
703
+ // Enable punctuation for cleaner transcript output.
704
+ params.set("punctuate", "true");
705
+
706
+ // Request linear16 PCM encoding — clients send raw PCM.
707
+ params.set("encoding", "linear16");
708
+ params.set("sample_rate", String(this.sampleRate));
709
+ params.set("channels", "1");
710
+
711
+ return `${this.baseUrl}/v1/listen?${params.toString()}`;
712
+ }
713
+ }
714
+
715
+ // ---------------------------------------------------------------------------
716
+ // Helpers
717
+ // ---------------------------------------------------------------------------
718
+
719
+ /**
720
+ * Derive a single `speakerLabel` for a diarized chunk.
721
+ *
722
+ * Deepgram exposes speaker tags in two shapes:
723
+ * 1. Some API versions attach a top-level `speaker` on the alternative
724
+ * when the chunk is dominated by a single voice.
725
+ * 2. In the general case, per-word speaker tags live on
726
+ * `alternatives[0].words[].speaker`.
727
+ *
728
+ * We prefer the top-level tag when present; otherwise we pick the
729
+ * most-frequent per-word speaker. On ties we fall back to the first
730
+ * word's speaker so short segments where the endpointer didn't cleanly
731
+ * break between turns still attribute deterministically.
732
+ *
733
+ * Returns `undefined` when no speaker information is available — the
734
+ * resolver treats unlabeled chunks the same as a non-diarizing provider.
735
+ *
736
+ * The returned label is `String(speaker)` to match the `speakerLabel`
737
+ * contract on {@link SttStreamServerPartialEvent} /
738
+ * {@link SttStreamServerFinalEvent}.
739
+ */
740
+ function extractSpeakerLabel(
741
+ alternative: DeepgramStreamAlternative | undefined,
742
+ ): string | undefined {
743
+ if (!alternative) return undefined;
744
+ if (typeof alternative.speaker === "number") {
745
+ return String(alternative.speaker);
746
+ }
747
+ const words = alternative.words;
748
+ if (!Array.isArray(words) || words.length === 0) return undefined;
749
+ const counts = new Map<number, number>();
750
+ let firstSpeaker: number | undefined;
751
+ for (const word of words) {
752
+ if (typeof word.speaker !== "number") continue;
753
+ if (firstSpeaker === undefined) firstSpeaker = word.speaker;
754
+ counts.set(word.speaker, (counts.get(word.speaker) ?? 0) + 1);
755
+ }
756
+ if (counts.size === 0 || firstSpeaker === undefined) return undefined;
757
+ // Pick the most common speaker; on ties, prefer the first-word speaker.
758
+ let bestSpeaker = firstSpeaker;
759
+ let bestCount = counts.get(firstSpeaker) ?? 0;
760
+ for (const [speaker, count] of counts) {
761
+ if (count > bestCount) {
762
+ bestSpeaker = speaker;
763
+ bestCount = count;
764
+ }
765
+ }
766
+ return String(bestSpeaker);
767
+ }